1 /* GIMPLE store merging and byte swapping passes.
2    Copyright (C) 2009-2019 Free Software Foundation, Inc.
3    Contributed by ARM Ltd.
4 
5    This file is part of GCC.
6 
7    GCC is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    GCC is distributed in the hope that it will be useful, but
13    WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15    General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING3.  If not see
19    <http://www.gnu.org/licenses/>.  */
20 
21 /* The purpose of the store merging pass is to combine multiple memory stores
22    of constant values, values loaded from memory, bitwise operations on those,
23    or bit-field values, to consecutive locations, into fewer wider stores.
24 
25    For example, if we have a sequence peforming four byte stores to
26    consecutive memory locations:
27    [p     ] := imm1;
28    [p + 1B] := imm2;
29    [p + 2B] := imm3;
30    [p + 3B] := imm4;
31    we can transform this into a single 4-byte store if the target supports it:
32    [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
33 
34    Or:
35    [p     ] := [q     ];
36    [p + 1B] := [q + 1B];
37    [p + 2B] := [q + 2B];
38    [p + 3B] := [q + 3B];
39    if there is no overlap can be transformed into a single 4-byte
40    load followed by single 4-byte store.
41 
42    Or:
43    [p     ] := [q     ] ^ imm1;
44    [p + 1B] := [q + 1B] ^ imm2;
45    [p + 2B] := [q + 2B] ^ imm3;
46    [p + 3B] := [q + 3B] ^ imm4;
47    if there is no overlap can be transformed into a single 4-byte
48    load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
49 
50    Or:
51    [p:1 ] := imm;
52    [p:31] := val & 0x7FFFFFFF;
53    we can transform this into a single 4-byte store if the target supports it:
54    [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
55 
56    The algorithm is applied to each basic block in three phases:
57 
58    1) Scan through the basic block and record assignments to destinations
59    that can be expressed as a store to memory of a certain size at a certain
60    bit offset from base expressions we can handle.  For bit-fields we also
61    record the surrounding bit region, i.e. bits that could be stored in
62    a read-modify-write operation when storing the bit-field.  Record store
63    chains to different bases in a hash_map (m_stores) and make sure to
64    terminate such chains when appropriate (for example when when the stored
65    values get used subsequently).
66    These stores can be a result of structure element initializers, array stores
67    etc.  A store_immediate_info object is recorded for every such store.
68    Record as many such assignments to a single base as possible until a
69    statement that interferes with the store sequence is encountered.
70    Each store has up to 2 operands, which can be a either constant, a memory
71    load or an SSA name, from which the value to be stored can be computed.
72    At most one of the operands can be a constant.  The operands are recorded
73    in store_operand_info struct.
74 
75    2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76    store_immediate_info objects) and coalesce contiguous stores into
77    merged_store_group objects.  For bit-field stores, we don't need to
78    require the stores to be contiguous, just their surrounding bit regions
79    have to be contiguous.  If the expression being stored is different
80    between adjacent stores, such as one store storing a constant and
81    following storing a value loaded from memory, or if the loaded memory
82    objects are not adjacent, a new merged_store_group is created as well.
83 
84    For example, given the stores:
85    [p     ] := 0;
86    [p + 1B] := 1;
87    [p + 3B] := 0;
88    [p + 4B] := 1;
89    [p + 5B] := 0;
90    [p + 6B] := 0;
91    This phase would produce two merged_store_group objects, one recording the
92    two bytes stored in the memory region [p : p + 1] and another
93    recording the four bytes stored in the memory region [p + 3 : p + 6].
94 
95    3) The merged_store_group objects produced in phase 2) are processed
96    to generate the sequence of wider stores that set the contiguous memory
97    regions to the sequence of bytes that correspond to it.  This may emit
98    multiple stores per store group to handle contiguous stores that are not
99    of a size that is a power of 2.  For example it can try to emit a 40-bit
100    store as a 32-bit store followed by an 8-bit store.
101    We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102    or TARGET_SLOW_UNALIGNED_ACCESS settings.
103 
104    Note on endianness and example:
105    Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106    [p     ] := 0x1234;
107    [p + 2B] := 0x5678;
108    [p + 4B] := 0xab;
109    [p + 5B] := 0xcd;
110 
111    The memory layout for little-endian (LE) and big-endian (BE) must be:
112   p |LE|BE|
113   ---------
114   0 |34|12|
115   1 |12|34|
116   2 |78|56|
117   3 |56|78|
118   4 |ab|ab|
119   5 |cd|cd|
120 
121   To merge these into a single 48-bit merged value 'val' in phase 2)
122   on little-endian we insert stores to higher (consecutive) bitpositions
123   into the most significant bits of the merged value.
124   The final merged value would be: 0xcdab56781234
125 
126   For big-endian we insert stores to higher bitpositions into the least
127   significant bits of the merged value.
128   The final merged value would be: 0x12345678abcd
129 
130   Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131   followed by a 16-bit store.  Again, we must consider endianness when
132   breaking down the 48-bit value 'val' computed above.
133   For little endian we emit:
134   [p]      (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135   [p + 4B] (16-bit) := 0xcdab;    // (val & 0xffff00000000) >> 32;
136 
137   Whereas for big-endian we emit:
138   [p]      (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139   [p + 4B] (16-bit) := 0xabcd;     //  val & 0x00000000ffff;  */
140 
141 #include "config.h"
142 #include "system.h"
143 #include "coretypes.h"
144 #include "backend.h"
145 #include "tree.h"
146 #include "gimple.h"
147 #include "builtins.h"
148 #include "fold-const.h"
149 #include "tree-pass.h"
150 #include "ssa.h"
151 #include "gimple-pretty-print.h"
152 #include "alias.h"
153 #include "fold-const.h"
154 #include "params.h"
155 #include "print-tree.h"
156 #include "tree-hash-traits.h"
157 #include "gimple-iterator.h"
158 #include "gimplify.h"
159 #include "gimple-fold.h"
160 #include "stor-layout.h"
161 #include "timevar.h"
162 #include "tree-cfg.h"
163 #include "tree-eh.h"
164 #include "target.h"
165 #include "gimplify-me.h"
166 #include "rtl.h"
167 #include "expr.h"	/* For get_bit_range.  */
168 #include "optabs-tree.h"
169 #include "selftest.h"
170 
171 /* The maximum size (in bits) of the stores this pass should generate.  */
172 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
173 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
174 
175 /* Limit to bound the number of aliasing checks for loads with the same
176    vuse as the corresponding store.  */
177 #define MAX_STORE_ALIAS_CHECKS 64
178 
179 namespace {
180 
181 struct bswap_stat
182 {
183   /* Number of hand-written 16-bit nop / bswaps found.  */
184   int found_16bit;
185 
186   /* Number of hand-written 32-bit nop / bswaps found.  */
187   int found_32bit;
188 
189   /* Number of hand-written 64-bit nop / bswaps found.  */
190   int found_64bit;
191 } nop_stats, bswap_stats;
192 
193 /* A symbolic number structure is used to detect byte permutation and selection
194    patterns of a source.  To achieve that, its field N contains an artificial
195    number consisting of BITS_PER_MARKER sized markers tracking where does each
196    byte come from in the source:
197 
198    0	   - target byte has the value 0
199    FF	   - target byte has an unknown value (eg. due to sign extension)
200    1..size - marker value is the byte index in the source (0 for lsb).
201 
202    To detect permutations on memory sources (arrays and structures), a symbolic
203    number is also associated:
204    - a base address BASE_ADDR and an OFFSET giving the address of the source;
205    - a range which gives the difference between the highest and lowest accessed
206      memory location to make such a symbolic number;
207    - the address SRC of the source element of lowest address as a convenience
208      to easily get BASE_ADDR + offset + lowest bytepos;
209    - number of expressions N_OPS bitwise ored together to represent
210      approximate cost of the computation.
211 
212    Note 1: the range is different from size as size reflects the size of the
213    type of the current expression.  For instance, for an array char a[],
214    (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
215    (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
216    time a range of 1.
217 
218    Note 2: for non-memory sources, range holds the same value as size.
219 
220    Note 3: SRC points to the SSA_NAME in case of non-memory source.  */
221 
222 struct symbolic_number {
223   uint64_t n;
224   tree type;
225   tree base_addr;
226   tree offset;
227   poly_int64_pod bytepos;
228   tree src;
229   tree alias_set;
230   tree vuse;
231   unsigned HOST_WIDE_INT range;
232   int n_ops;
233 };
234 
235 #define BITS_PER_MARKER 8
236 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
237 #define MARKER_BYTE_UNKNOWN MARKER_MASK
238 #define HEAD_MARKER(n, size) \
239   ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
240 
241 /* The number which the find_bswap_or_nop_1 result should match in
242    order to have a nop.  The number is masked according to the size of
243    the symbolic number before using it.  */
244 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
245   (uint64_t)0x08070605 << 32 | 0x04030201)
246 
247 /* The number which the find_bswap_or_nop_1 result should match in
248    order to have a byte swap.  The number is masked according to the
249    size of the symbolic number before using it.  */
250 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
251   (uint64_t)0x01020304 << 32 | 0x05060708)
252 
253 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
254    number N.  Return false if the requested operation is not permitted
255    on a symbolic number.  */
256 
257 inline bool
do_shift_rotate(enum tree_code code,struct symbolic_number * n,int count)258 do_shift_rotate (enum tree_code code,
259 		 struct symbolic_number *n,
260 		 int count)
261 {
262   int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
263   unsigned head_marker;
264 
265   if (count < 0
266       || count >= TYPE_PRECISION (n->type)
267       || count % BITS_PER_UNIT != 0)
268     return false;
269   count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
270 
271   /* Zero out the extra bits of N in order to avoid them being shifted
272      into the significant bits.  */
273   if (size < 64 / BITS_PER_MARKER)
274     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
275 
276   switch (code)
277     {
278     case LSHIFT_EXPR:
279       n->n <<= count;
280       break;
281     case RSHIFT_EXPR:
282       head_marker = HEAD_MARKER (n->n, size);
283       n->n >>= count;
284       /* Arithmetic shift of signed type: result is dependent on the value.  */
285       if (!TYPE_UNSIGNED (n->type) && head_marker)
286 	for (i = 0; i < count / BITS_PER_MARKER; i++)
287 	  n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
288 		  << ((size - 1 - i) * BITS_PER_MARKER);
289       break;
290     case LROTATE_EXPR:
291       n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
292       break;
293     case RROTATE_EXPR:
294       n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
295       break;
296     default:
297       return false;
298     }
299   /* Zero unused bits for size.  */
300   if (size < 64 / BITS_PER_MARKER)
301     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
302   return true;
303 }
304 
305 /* Perform sanity checking for the symbolic number N and the gimple
306    statement STMT.  */
307 
308 inline bool
verify_symbolic_number_p(struct symbolic_number * n,gimple * stmt)309 verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
310 {
311   tree lhs_type;
312 
313   lhs_type = gimple_expr_type (stmt);
314 
315   if (TREE_CODE (lhs_type) != INTEGER_TYPE)
316     return false;
317 
318   if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
319     return false;
320 
321   return true;
322 }
323 
324 /* Initialize the symbolic number N for the bswap pass from the base element
325    SRC manipulated by the bitwise OR expression.  */
326 
327 bool
init_symbolic_number(struct symbolic_number * n,tree src)328 init_symbolic_number (struct symbolic_number *n, tree src)
329 {
330   int size;
331 
332   if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
333     return false;
334 
335   n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
336   n->src = src;
337 
338   /* Set up the symbolic number N by setting each byte to a value between 1 and
339      the byte size of rhs1.  The highest order byte is set to n->size and the
340      lowest order byte to 1.  */
341   n->type = TREE_TYPE (src);
342   size = TYPE_PRECISION (n->type);
343   if (size % BITS_PER_UNIT != 0)
344     return false;
345   size /= BITS_PER_UNIT;
346   if (size > 64 / BITS_PER_MARKER)
347     return false;
348   n->range = size;
349   n->n = CMPNOP;
350   n->n_ops = 1;
351 
352   if (size < 64 / BITS_PER_MARKER)
353     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
354 
355   return true;
356 }
357 
358 /* Check if STMT might be a byte swap or a nop from a memory source and returns
359    the answer. If so, REF is that memory source and the base of the memory area
360    accessed and the offset of the access from that base are recorded in N.  */
361 
362 bool
find_bswap_or_nop_load(gimple * stmt,tree ref,struct symbolic_number * n)363 find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
364 {
365   /* Leaf node is an array or component ref. Memorize its base and
366      offset from base to compare to other such leaf node.  */
367   poly_int64 bitsize, bitpos, bytepos;
368   machine_mode mode;
369   int unsignedp, reversep, volatilep;
370   tree offset, base_addr;
371 
372   /* Not prepared to handle PDP endian.  */
373   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
374     return false;
375 
376   if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
377     return false;
378 
379   base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
380 				   &unsignedp, &reversep, &volatilep);
381 
382   if (TREE_CODE (base_addr) == TARGET_MEM_REF)
383     /* Do not rewrite TARGET_MEM_REF.  */
384     return false;
385   else if (TREE_CODE (base_addr) == MEM_REF)
386     {
387       poly_offset_int bit_offset = 0;
388       tree off = TREE_OPERAND (base_addr, 1);
389 
390       if (!integer_zerop (off))
391 	{
392 	  poly_offset_int boff = mem_ref_offset (base_addr);
393 	  boff <<= LOG2_BITS_PER_UNIT;
394 	  bit_offset += boff;
395 	}
396 
397       base_addr = TREE_OPERAND (base_addr, 0);
398 
399       /* Avoid returning a negative bitpos as this may wreak havoc later.  */
400       if (maybe_lt (bit_offset, 0))
401 	{
402 	  tree byte_offset = wide_int_to_tree
403 	    (sizetype, bits_to_bytes_round_down (bit_offset));
404 	  bit_offset = num_trailing_bits (bit_offset);
405 	  if (offset)
406 	    offset = size_binop (PLUS_EXPR, offset, byte_offset);
407 	  else
408 	    offset = byte_offset;
409 	}
410 
411       bitpos += bit_offset.force_shwi ();
412     }
413   else
414     base_addr = build_fold_addr_expr (base_addr);
415 
416   if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
417     return false;
418   if (!multiple_p (bitsize, BITS_PER_UNIT))
419     return false;
420   if (reversep)
421     return false;
422 
423   if (!init_symbolic_number (n, ref))
424     return false;
425   n->base_addr = base_addr;
426   n->offset = offset;
427   n->bytepos = bytepos;
428   n->alias_set = reference_alias_ptr_type (ref);
429   n->vuse = gimple_vuse (stmt);
430   return true;
431 }
432 
433 /* Compute the symbolic number N representing the result of a bitwise OR on 2
434    symbolic number N1 and N2 whose source statements are respectively
435    SOURCE_STMT1 and SOURCE_STMT2.  */
436 
437 gimple *
perform_symbolic_merge(gimple * source_stmt1,struct symbolic_number * n1,gimple * source_stmt2,struct symbolic_number * n2,struct symbolic_number * n)438 perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
439 			gimple *source_stmt2, struct symbolic_number *n2,
440 			struct symbolic_number *n)
441 {
442   int i, size;
443   uint64_t mask;
444   gimple *source_stmt;
445   struct symbolic_number *n_start;
446 
447   tree rhs1 = gimple_assign_rhs1 (source_stmt1);
448   if (TREE_CODE (rhs1) == BIT_FIELD_REF
449       && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
450     rhs1 = TREE_OPERAND (rhs1, 0);
451   tree rhs2 = gimple_assign_rhs1 (source_stmt2);
452   if (TREE_CODE (rhs2) == BIT_FIELD_REF
453       && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
454     rhs2 = TREE_OPERAND (rhs2, 0);
455 
456   /* Sources are different, cancel bswap if they are not memory location with
457      the same base (array, structure, ...).  */
458   if (rhs1 != rhs2)
459     {
460       uint64_t inc;
461       HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
462       struct symbolic_number *toinc_n_ptr, *n_end;
463       basic_block bb1, bb2;
464 
465       if (!n1->base_addr || !n2->base_addr
466 	  || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
467 	return NULL;
468 
469       if (!n1->offset != !n2->offset
470 	  || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
471 	return NULL;
472 
473       start1 = 0;
474       if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
475 	return NULL;
476 
477       if (start1 < start2)
478 	{
479 	  n_start = n1;
480 	  start_sub = start2 - start1;
481 	}
482       else
483 	{
484 	  n_start = n2;
485 	  start_sub = start1 - start2;
486 	}
487 
488       bb1 = gimple_bb (source_stmt1);
489       bb2 = gimple_bb (source_stmt2);
490       if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
491 	source_stmt = source_stmt1;
492       else
493 	source_stmt = source_stmt2;
494 
495       /* Find the highest address at which a load is performed and
496 	 compute related info.  */
497       end1 = start1 + (n1->range - 1);
498       end2 = start2 + (n2->range - 1);
499       if (end1 < end2)
500 	{
501 	  end = end2;
502 	  end_sub = end2 - end1;
503 	}
504       else
505 	{
506 	  end = end1;
507 	  end_sub = end1 - end2;
508 	}
509       n_end = (end2 > end1) ? n2 : n1;
510 
511       /* Find symbolic number whose lsb is the most significant.  */
512       if (BYTES_BIG_ENDIAN)
513 	toinc_n_ptr = (n_end == n1) ? n2 : n1;
514       else
515 	toinc_n_ptr = (n_start == n1) ? n2 : n1;
516 
517       n->range = end - MIN (start1, start2) + 1;
518 
519       /* Check that the range of memory covered can be represented by
520 	 a symbolic number.  */
521       if (n->range > 64 / BITS_PER_MARKER)
522 	return NULL;
523 
524       /* Reinterpret byte marks in symbolic number holding the value of
525 	 bigger weight according to target endianness.  */
526       inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
527       size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
528       for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
529 	{
530 	  unsigned marker
531 	    = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
532 	  if (marker && marker != MARKER_BYTE_UNKNOWN)
533 	    toinc_n_ptr->n += inc;
534 	}
535     }
536   else
537     {
538       n->range = n1->range;
539       n_start = n1;
540       source_stmt = source_stmt1;
541     }
542 
543   if (!n1->alias_set
544       || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
545     n->alias_set = n1->alias_set;
546   else
547     n->alias_set = ptr_type_node;
548   n->vuse = n_start->vuse;
549   n->base_addr = n_start->base_addr;
550   n->offset = n_start->offset;
551   n->src = n_start->src;
552   n->bytepos = n_start->bytepos;
553   n->type = n_start->type;
554   size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
555 
556   for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
557     {
558       uint64_t masked1, masked2;
559 
560       masked1 = n1->n & mask;
561       masked2 = n2->n & mask;
562       if (masked1 && masked2 && masked1 != masked2)
563 	return NULL;
564     }
565   n->n = n1->n | n2->n;
566   n->n_ops = n1->n_ops + n2->n_ops;
567 
568   return source_stmt;
569 }
570 
571 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
572    the operation given by the rhs of STMT on the result.  If the operation
573    could successfully be executed the function returns a gimple stmt whose
574    rhs's first tree is the expression of the source operand and NULL
575    otherwise.  */
576 
577 gimple *
find_bswap_or_nop_1(gimple * stmt,struct symbolic_number * n,int limit)578 find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
579 {
580   enum tree_code code;
581   tree rhs1, rhs2 = NULL;
582   gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
583   enum gimple_rhs_class rhs_class;
584 
585   if (!limit || !is_gimple_assign (stmt))
586     return NULL;
587 
588   rhs1 = gimple_assign_rhs1 (stmt);
589 
590   if (find_bswap_or_nop_load (stmt, rhs1, n))
591     return stmt;
592 
593   /* Handle BIT_FIELD_REF.  */
594   if (TREE_CODE (rhs1) == BIT_FIELD_REF
595       && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
596     {
597       unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
598       unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
599       if (bitpos % BITS_PER_UNIT == 0
600 	  && bitsize % BITS_PER_UNIT == 0
601 	  && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
602 	{
603 	  /* Handle big-endian bit numbering in BIT_FIELD_REF.  */
604 	  if (BYTES_BIG_ENDIAN)
605 	    bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
606 
607 	  /* Shift.  */
608 	  if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
609 	    return NULL;
610 
611 	  /* Mask.  */
612 	  uint64_t mask = 0;
613 	  uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
614 	  for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
615 	       i++, tmp <<= BITS_PER_UNIT)
616 	    mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
617 	  n->n &= mask;
618 
619 	  /* Convert.  */
620 	  n->type = TREE_TYPE (rhs1);
621 	  if (!n->base_addr)
622 	    n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
623 
624 	  return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
625 	}
626 
627       return NULL;
628     }
629 
630   if (TREE_CODE (rhs1) != SSA_NAME)
631     return NULL;
632 
633   code = gimple_assign_rhs_code (stmt);
634   rhs_class = gimple_assign_rhs_class (stmt);
635   rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
636 
637   if (rhs_class == GIMPLE_BINARY_RHS)
638     rhs2 = gimple_assign_rhs2 (stmt);
639 
640   /* Handle unary rhs and binary rhs with integer constants as second
641      operand.  */
642 
643   if (rhs_class == GIMPLE_UNARY_RHS
644       || (rhs_class == GIMPLE_BINARY_RHS
645 	  && TREE_CODE (rhs2) == INTEGER_CST))
646     {
647       if (code != BIT_AND_EXPR
648 	  && code != LSHIFT_EXPR
649 	  && code != RSHIFT_EXPR
650 	  && code != LROTATE_EXPR
651 	  && code != RROTATE_EXPR
652 	  && !CONVERT_EXPR_CODE_P (code))
653 	return NULL;
654 
655       source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
656 
657       /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
658 	 we have to initialize the symbolic number.  */
659       if (!source_stmt1)
660 	{
661 	  if (gimple_assign_load_p (stmt)
662 	      || !init_symbolic_number (n, rhs1))
663 	    return NULL;
664 	  source_stmt1 = stmt;
665 	}
666 
667       switch (code)
668 	{
669 	case BIT_AND_EXPR:
670 	  {
671 	    int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
672 	    uint64_t val = int_cst_value (rhs2), mask = 0;
673 	    uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
674 
675 	    /* Only constants masking full bytes are allowed.  */
676 	    for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
677 	      if ((val & tmp) != 0 && (val & tmp) != tmp)
678 		return NULL;
679 	      else if (val & tmp)
680 		mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
681 
682 	    n->n &= mask;
683 	  }
684 	  break;
685 	case LSHIFT_EXPR:
686 	case RSHIFT_EXPR:
687 	case LROTATE_EXPR:
688 	case RROTATE_EXPR:
689 	  if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
690 	    return NULL;
691 	  break;
692 	CASE_CONVERT:
693 	  {
694 	    int i, type_size, old_type_size;
695 	    tree type;
696 
697 	    type = gimple_expr_type (stmt);
698 	    type_size = TYPE_PRECISION (type);
699 	    if (type_size % BITS_PER_UNIT != 0)
700 	      return NULL;
701 	    type_size /= BITS_PER_UNIT;
702 	    if (type_size > 64 / BITS_PER_MARKER)
703 	      return NULL;
704 
705 	    /* Sign extension: result is dependent on the value.  */
706 	    old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
707 	    if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
708 		&& HEAD_MARKER (n->n, old_type_size))
709 	      for (i = 0; i < type_size - old_type_size; i++)
710 		n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
711 			<< ((type_size - 1 - i) * BITS_PER_MARKER);
712 
713 	    if (type_size < 64 / BITS_PER_MARKER)
714 	      {
715 		/* If STMT casts to a smaller type mask out the bits not
716 		   belonging to the target type.  */
717 		n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
718 	      }
719 	    n->type = type;
720 	    if (!n->base_addr)
721 	      n->range = type_size;
722 	  }
723 	  break;
724 	default:
725 	  return NULL;
726 	};
727       return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
728     }
729 
730   /* Handle binary rhs.  */
731 
732   if (rhs_class == GIMPLE_BINARY_RHS)
733     {
734       struct symbolic_number n1, n2;
735       gimple *source_stmt, *source_stmt2;
736 
737       if (code != BIT_IOR_EXPR)
738 	return NULL;
739 
740       if (TREE_CODE (rhs2) != SSA_NAME)
741 	return NULL;
742 
743       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
744 
745       switch (code)
746 	{
747 	case BIT_IOR_EXPR:
748 	  source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
749 
750 	  if (!source_stmt1)
751 	    return NULL;
752 
753 	  source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
754 
755 	  if (!source_stmt2)
756 	    return NULL;
757 
758 	  if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
759 	    return NULL;
760 
761 	  if (n1.vuse != n2.vuse)
762 	    return NULL;
763 
764 	  source_stmt
765 	    = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
766 
767 	  if (!source_stmt)
768 	    return NULL;
769 
770 	  if (!verify_symbolic_number_p (n, stmt))
771 	    return NULL;
772 
773 	  break;
774 	default:
775 	  return NULL;
776 	}
777       return source_stmt;
778     }
779   return NULL;
780 }
781 
782 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
783    *CMPXCHG, *CMPNOP and adjust *N.  */
784 
785 void
find_bswap_or_nop_finalize(struct symbolic_number * n,uint64_t * cmpxchg,uint64_t * cmpnop)786 find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
787 			    uint64_t *cmpnop)
788 {
789   unsigned rsize;
790   uint64_t tmpn, mask;
791 
792   /* The number which the find_bswap_or_nop_1 result should match in order
793      to have a full byte swap.  The number is shifted to the right
794      according to the size of the symbolic number before using it.  */
795   *cmpxchg = CMPXCHG;
796   *cmpnop = CMPNOP;
797 
798   /* Find real size of result (highest non-zero byte).  */
799   if (n->base_addr)
800     for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
801   else
802     rsize = n->range;
803 
804   /* Zero out the bits corresponding to untouched bytes in original gimple
805      expression.  */
806   if (n->range < (int) sizeof (int64_t))
807     {
808       mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
809       *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
810       *cmpnop &= mask;
811     }
812 
813   /* Zero out the bits corresponding to unused bytes in the result of the
814      gimple expression.  */
815   if (rsize < n->range)
816     {
817       if (BYTES_BIG_ENDIAN)
818 	{
819 	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
820 	  *cmpxchg &= mask;
821 	  *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
822 	}
823       else
824 	{
825 	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
826 	  *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
827 	  *cmpnop &= mask;
828 	}
829       n->range = rsize;
830     }
831 
832   n->range *= BITS_PER_UNIT;
833 }
834 
835 /* Check if STMT completes a bswap implementation or a read in a given
836    endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
837    accordingly.  It also sets N to represent the kind of operations
838    performed: size of the resulting expression and whether it works on
839    a memory source, and if so alias-set and vuse.  At last, the
840    function returns a stmt whose rhs's first tree is the source
841    expression.  */
842 
843 gimple *
find_bswap_or_nop(gimple * stmt,struct symbolic_number * n,bool * bswap)844 find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
845 {
846   /* The last parameter determines the depth search limit.  It usually
847      correlates directly to the number n of bytes to be touched.  We
848      increase that number by log2(n) + 1 here in order to also
849      cover signed -> unsigned conversions of the src operand as can be seen
850      in libgcc, and for initial shift/and operation of the src operand.  */
851   int limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
852   limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
853   gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
854 
855   if (!ins_stmt)
856     return NULL;
857 
858   uint64_t cmpxchg, cmpnop;
859   find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop);
860 
861   /* A complete byte swap should make the symbolic number to start with
862      the largest digit in the highest order byte. Unchanged symbolic
863      number indicates a read with same endianness as target architecture.  */
864   if (n->n == cmpnop)
865     *bswap = false;
866   else if (n->n == cmpxchg)
867     *bswap = true;
868   else
869     return NULL;
870 
871   /* Useless bit manipulation performed by code.  */
872   if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
873     return NULL;
874 
875   return ins_stmt;
876 }
877 
878 const pass_data pass_data_optimize_bswap =
879 {
880   GIMPLE_PASS, /* type */
881   "bswap", /* name */
882   OPTGROUP_NONE, /* optinfo_flags */
883   TV_NONE, /* tv_id */
884   PROP_ssa, /* properties_required */
885   0, /* properties_provided */
886   0, /* properties_destroyed */
887   0, /* todo_flags_start */
888   0, /* todo_flags_finish */
889 };
890 
891 class pass_optimize_bswap : public gimple_opt_pass
892 {
893 public:
pass_optimize_bswap(gcc::context * ctxt)894   pass_optimize_bswap (gcc::context *ctxt)
895     : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
896   {}
897 
898   /* opt_pass methods: */
gate(function *)899   virtual bool gate (function *)
900     {
901       return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
902     }
903 
904   virtual unsigned int execute (function *);
905 
906 }; // class pass_optimize_bswap
907 
908 /* Perform the bswap optimization: replace the expression computed in the rhs
909    of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
910    bswap, load or load + bswap expression.
911    Which of these alternatives replace the rhs is given by N->base_addr (non
912    null if a load is needed) and BSWAP.  The type, VUSE and set-alias of the
913    load to perform are also given in N while the builtin bswap invoke is given
914    in FNDEL.  Finally, if a load is involved, INS_STMT refers to one of the
915    load statements involved to construct the rhs in gsi_stmt (GSI) and
916    N->range gives the size of the rhs expression for maintaining some
917    statistics.
918 
919    Note that if the replacement involve a load and if gsi_stmt (GSI) is
920    non-NULL, that stmt is moved just after INS_STMT to do the load with the
921    same VUSE which can lead to gsi_stmt (GSI) changing of basic block.  */
922 
923 tree
bswap_replace(gimple_stmt_iterator gsi,gimple * ins_stmt,tree fndecl,tree bswap_type,tree load_type,struct symbolic_number * n,bool bswap)924 bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
925 	       tree bswap_type, tree load_type, struct symbolic_number *n,
926 	       bool bswap)
927 {
928   tree src, tmp, tgt = NULL_TREE;
929   gimple *bswap_stmt;
930 
931   gimple *cur_stmt = gsi_stmt (gsi);
932   src = n->src;
933   if (cur_stmt)
934     tgt = gimple_assign_lhs (cur_stmt);
935 
936   /* Need to load the value from memory first.  */
937   if (n->base_addr)
938     {
939       gimple_stmt_iterator gsi_ins = gsi;
940       if (ins_stmt)
941 	gsi_ins = gsi_for_stmt (ins_stmt);
942       tree addr_expr, addr_tmp, val_expr, val_tmp;
943       tree load_offset_ptr, aligned_load_type;
944       gimple *load_stmt;
945       unsigned align = get_object_alignment (src);
946       poly_int64 load_offset = 0;
947 
948       if (cur_stmt)
949 	{
950 	  basic_block ins_bb = gimple_bb (ins_stmt);
951 	  basic_block cur_bb = gimple_bb (cur_stmt);
952 	  if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
953 	    return NULL_TREE;
954 
955 	  /* Move cur_stmt just before one of the load of the original
956 	     to ensure it has the same VUSE.  See PR61517 for what could
957 	     go wrong.  */
958 	  if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
959 	    reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
960 	  gsi_move_before (&gsi, &gsi_ins);
961 	  gsi = gsi_for_stmt (cur_stmt);
962 	}
963       else
964 	gsi = gsi_ins;
965 
966       /* Compute address to load from and cast according to the size
967 	 of the load.  */
968       addr_expr = build_fold_addr_expr (src);
969       if (is_gimple_mem_ref_addr (addr_expr))
970 	addr_tmp = unshare_expr (addr_expr);
971       else
972 	{
973 	  addr_tmp = unshare_expr (n->base_addr);
974 	  if (!is_gimple_mem_ref_addr (addr_tmp))
975 	    addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
976 						   is_gimple_mem_ref_addr,
977 						   NULL_TREE, true,
978 						   GSI_SAME_STMT);
979 	  load_offset = n->bytepos;
980 	  if (n->offset)
981 	    {
982 	      tree off
983 		= force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
984 					    true, NULL_TREE, true,
985 					    GSI_SAME_STMT);
986 	      gimple *stmt
987 		= gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
988 				       POINTER_PLUS_EXPR, addr_tmp, off);
989 	      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
990 	      addr_tmp = gimple_assign_lhs (stmt);
991 	    }
992 	}
993 
994       /* Perform the load.  */
995       aligned_load_type = load_type;
996       if (align < TYPE_ALIGN (load_type))
997 	aligned_load_type = build_aligned_type (load_type, align);
998       load_offset_ptr = build_int_cst (n->alias_set, load_offset);
999       val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1000 			      load_offset_ptr);
1001 
1002       if (!bswap)
1003 	{
1004 	  if (n->range == 16)
1005 	    nop_stats.found_16bit++;
1006 	  else if (n->range == 32)
1007 	    nop_stats.found_32bit++;
1008 	  else
1009 	    {
1010 	      gcc_assert (n->range == 64);
1011 	      nop_stats.found_64bit++;
1012 	    }
1013 
1014 	  /* Convert the result of load if necessary.  */
1015 	  if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
1016 	    {
1017 	      val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1018 					    "load_dst");
1019 	      load_stmt = gimple_build_assign (val_tmp, val_expr);
1020 	      gimple_set_vuse (load_stmt, n->vuse);
1021 	      gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1022 	      gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
1023 	      update_stmt (cur_stmt);
1024 	    }
1025 	  else if (cur_stmt)
1026 	    {
1027 	      gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1028 	      gimple_set_vuse (cur_stmt, n->vuse);
1029 	      update_stmt (cur_stmt);
1030 	    }
1031 	  else
1032 	    {
1033 	      tgt = make_ssa_name (load_type);
1034 	      cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1035 	      gimple_set_vuse (cur_stmt, n->vuse);
1036 	      gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
1037 	    }
1038 
1039 	  if (dump_file)
1040 	    {
1041 	      fprintf (dump_file,
1042 		       "%d bit load in target endianness found at: ",
1043 		       (int) n->range);
1044 	      print_gimple_stmt (dump_file, cur_stmt, 0);
1045 	    }
1046 	  return tgt;
1047 	}
1048       else
1049 	{
1050 	  val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1051 	  load_stmt = gimple_build_assign (val_tmp, val_expr);
1052 	  gimple_set_vuse (load_stmt, n->vuse);
1053 	  gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1054 	}
1055       src = val_tmp;
1056     }
1057   else if (!bswap)
1058     {
1059       gimple *g = NULL;
1060       if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
1061 	{
1062 	  if (!is_gimple_val (src))
1063 	    return NULL_TREE;
1064 	  g = gimple_build_assign (tgt, NOP_EXPR, src);
1065 	}
1066       else if (cur_stmt)
1067 	g = gimple_build_assign (tgt, src);
1068       else
1069 	tgt = src;
1070       if (n->range == 16)
1071 	nop_stats.found_16bit++;
1072       else if (n->range == 32)
1073 	nop_stats.found_32bit++;
1074       else
1075 	{
1076 	  gcc_assert (n->range == 64);
1077 	  nop_stats.found_64bit++;
1078 	}
1079       if (dump_file)
1080 	{
1081 	  fprintf (dump_file,
1082 		   "%d bit reshuffle in target endianness found at: ",
1083 		   (int) n->range);
1084 	  if (cur_stmt)
1085 	    print_gimple_stmt (dump_file, cur_stmt, 0);
1086 	  else
1087 	    {
1088 	      print_generic_expr (dump_file, tgt, TDF_NONE);
1089 	      fprintf (dump_file, "\n");
1090 	    }
1091 	}
1092       if (cur_stmt)
1093 	gsi_replace (&gsi, g, true);
1094       return tgt;
1095     }
1096   else if (TREE_CODE (src) == BIT_FIELD_REF)
1097     src = TREE_OPERAND (src, 0);
1098 
1099   if (n->range == 16)
1100     bswap_stats.found_16bit++;
1101   else if (n->range == 32)
1102     bswap_stats.found_32bit++;
1103   else
1104     {
1105       gcc_assert (n->range == 64);
1106       bswap_stats.found_64bit++;
1107     }
1108 
1109   tmp = src;
1110 
1111   /* Convert the src expression if necessary.  */
1112   if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1113     {
1114       gimple *convert_stmt;
1115 
1116       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1117       convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1118       gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1119     }
1120 
1121   /* Canonical form for 16 bit bswap is a rotate expression.  Only 16bit values
1122      are considered as rotation of 2N bit values by N bits is generally not
1123      equivalent to a bswap.  Consider for instance 0x01020304 r>> 16 which
1124      gives 0x03040102 while a bswap for that value is 0x04030201.  */
1125   if (bswap && n->range == 16)
1126     {
1127       tree count = build_int_cst (NULL, BITS_PER_UNIT);
1128       src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1129       bswap_stmt = gimple_build_assign (NULL, src);
1130     }
1131   else
1132     bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1133 
1134   if (tgt == NULL_TREE)
1135     tgt = make_ssa_name (bswap_type);
1136   tmp = tgt;
1137 
1138   /* Convert the result if necessary.  */
1139   if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1140     {
1141       gimple *convert_stmt;
1142 
1143       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1144       convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
1145       gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1146     }
1147 
1148   gimple_set_lhs (bswap_stmt, tmp);
1149 
1150   if (dump_file)
1151     {
1152       fprintf (dump_file, "%d bit bswap implementation found at: ",
1153 	       (int) n->range);
1154       if (cur_stmt)
1155 	print_gimple_stmt (dump_file, cur_stmt, 0);
1156       else
1157 	{
1158 	  print_generic_expr (dump_file, tgt, TDF_NONE);
1159 	  fprintf (dump_file, "\n");
1160 	}
1161     }
1162 
1163   if (cur_stmt)
1164     {
1165       gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1166       gsi_remove (&gsi, true);
1167     }
1168   else
1169     gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1170   return tgt;
1171 }
1172 
1173 /* Find manual byte swap implementations as well as load in a given
1174    endianness. Byte swaps are turned into a bswap builtin invokation
1175    while endian loads are converted to bswap builtin invokation or
1176    simple load according to the target endianness.  */
1177 
1178 unsigned int
execute(function * fun)1179 pass_optimize_bswap::execute (function *fun)
1180 {
1181   basic_block bb;
1182   bool bswap32_p, bswap64_p;
1183   bool changed = false;
1184   tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1185 
1186   bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1187 	       && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1188   bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1189 	       && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1190 		   || (bswap32_p && word_mode == SImode)));
1191 
1192   /* Determine the argument type of the builtins.  The code later on
1193      assumes that the return and argument type are the same.  */
1194   if (bswap32_p)
1195     {
1196       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1197       bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1198     }
1199 
1200   if (bswap64_p)
1201     {
1202       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1203       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1204     }
1205 
1206   memset (&nop_stats, 0, sizeof (nop_stats));
1207   memset (&bswap_stats, 0, sizeof (bswap_stats));
1208   calculate_dominance_info (CDI_DOMINATORS);
1209 
1210   FOR_EACH_BB_FN (bb, fun)
1211     {
1212       gimple_stmt_iterator gsi;
1213 
1214       /* We do a reverse scan for bswap patterns to make sure we get the
1215 	 widest match. As bswap pattern matching doesn't handle previously
1216 	 inserted smaller bswap replacements as sub-patterns, the wider
1217 	 variant wouldn't be detected.  */
1218       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1219 	{
1220 	  gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1221 	  tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1222 	  enum tree_code code;
1223 	  struct symbolic_number n;
1224 	  bool bswap;
1225 
1226 	  /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1227 	     might be moved to a different basic block by bswap_replace and gsi
1228 	     must not points to it if that's the case.  Moving the gsi_prev
1229 	     there make sure that gsi points to the statement previous to
1230 	     cur_stmt while still making sure that all statements are
1231 	     considered in this basic block.  */
1232 	  gsi_prev (&gsi);
1233 
1234 	  if (!is_gimple_assign (cur_stmt))
1235 	    continue;
1236 
1237 	  code = gimple_assign_rhs_code (cur_stmt);
1238 	  switch (code)
1239 	    {
1240 	    case LROTATE_EXPR:
1241 	    case RROTATE_EXPR:
1242 	      if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1243 		  || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1244 		     % BITS_PER_UNIT)
1245 		continue;
1246 	      /* Fall through.  */
1247 	    case BIT_IOR_EXPR:
1248 	      break;
1249 	    default:
1250 	      continue;
1251 	    }
1252 
1253 	  ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
1254 
1255 	  if (!ins_stmt)
1256 	    continue;
1257 
1258 	  switch (n.range)
1259 	    {
1260 	    case 16:
1261 	      /* Already in canonical form, nothing to do.  */
1262 	      if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1263 		continue;
1264 	      load_type = bswap_type = uint16_type_node;
1265 	      break;
1266 	    case 32:
1267 	      load_type = uint32_type_node;
1268 	      if (bswap32_p)
1269 		{
1270 		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1271 		  bswap_type = bswap32_type;
1272 		}
1273 	      break;
1274 	    case 64:
1275 	      load_type = uint64_type_node;
1276 	      if (bswap64_p)
1277 		{
1278 		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1279 		  bswap_type = bswap64_type;
1280 		}
1281 	      break;
1282 	    default:
1283 	      continue;
1284 	    }
1285 
1286 	  if (bswap && !fndecl && n.range != 16)
1287 	    continue;
1288 
1289 	  if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1290 			     bswap_type, load_type, &n, bswap))
1291 	    changed = true;
1292 	}
1293     }
1294 
1295   statistics_counter_event (fun, "16-bit nop implementations found",
1296 			    nop_stats.found_16bit);
1297   statistics_counter_event (fun, "32-bit nop implementations found",
1298 			    nop_stats.found_32bit);
1299   statistics_counter_event (fun, "64-bit nop implementations found",
1300 			    nop_stats.found_64bit);
1301   statistics_counter_event (fun, "16-bit bswap implementations found",
1302 			    bswap_stats.found_16bit);
1303   statistics_counter_event (fun, "32-bit bswap implementations found",
1304 			    bswap_stats.found_32bit);
1305   statistics_counter_event (fun, "64-bit bswap implementations found",
1306 			    bswap_stats.found_64bit);
1307 
1308   return (changed ? TODO_update_ssa : 0);
1309 }
1310 
1311 } // anon namespace
1312 
1313 gimple_opt_pass *
make_pass_optimize_bswap(gcc::context * ctxt)1314 make_pass_optimize_bswap (gcc::context *ctxt)
1315 {
1316   return new pass_optimize_bswap (ctxt);
1317 }
1318 
1319 namespace {
1320 
1321 /* Struct recording one operand for the store, which is either a constant,
1322    then VAL represents the constant and all the other fields are zero, or
1323    a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1324    and the other fields also reflect the memory load, or an SSA name, then
1325    VAL represents the SSA name and all the other fields are zero,  */
1326 
1327 struct store_operand_info
1328 {
1329   tree val;
1330   tree base_addr;
1331   poly_uint64 bitsize;
1332   poly_uint64 bitpos;
1333   poly_uint64 bitregion_start;
1334   poly_uint64 bitregion_end;
1335   gimple *stmt;
1336   bool bit_not_p;
1337   store_operand_info ();
1338 };
1339 
store_operand_info()1340 store_operand_info::store_operand_info ()
1341   : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
1342     bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
1343 {
1344 }
1345 
1346 /* Struct recording the information about a single store of an immediate
1347    to memory.  These are created in the first phase and coalesced into
1348    merged_store_group objects in the second phase.  */
1349 
1350 struct store_immediate_info
1351 {
1352   unsigned HOST_WIDE_INT bitsize;
1353   unsigned HOST_WIDE_INT bitpos;
1354   unsigned HOST_WIDE_INT bitregion_start;
1355   /* This is one past the last bit of the bit region.  */
1356   unsigned HOST_WIDE_INT bitregion_end;
1357   gimple *stmt;
1358   unsigned int order;
1359   /* INTEGER_CST for constant stores, MEM_REF for memory copy,
1360      BIT_*_EXPR for logical bitwise operation, BIT_INSERT_EXPR
1361      for bit insertion.
1362      LROTATE_EXPR if it can be only bswap optimized and
1363      ops are not really meaningful.
1364      NOP_EXPR if bswap optimization detected identity, ops
1365      are not meaningful.  */
1366   enum tree_code rhs_code;
1367   /* Two fields for bswap optimization purposes.  */
1368   struct symbolic_number n;
1369   gimple *ins_stmt;
1370   /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing.  */
1371   bool bit_not_p;
1372   /* True if ops have been swapped and thus ops[1] represents
1373      rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2.  */
1374   bool ops_swapped_p;
1375   /* Operands.  For BIT_*_EXPR rhs_code both operands are used, otherwise
1376      just the first one.  */
1377   store_operand_info ops[2];
1378   store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1379 			unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1380 			gimple *, unsigned int, enum tree_code,
1381 			struct symbolic_number &, gimple *, bool,
1382 			const store_operand_info &,
1383 			const store_operand_info &);
1384 };
1385 
store_immediate_info(unsigned HOST_WIDE_INT bs,unsigned HOST_WIDE_INT bp,unsigned HOST_WIDE_INT brs,unsigned HOST_WIDE_INT bre,gimple * st,unsigned int ord,enum tree_code rhscode,struct symbolic_number & nr,gimple * ins_stmtp,bool bitnotp,const store_operand_info & op0r,const store_operand_info & op1r)1386 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
1387 					    unsigned HOST_WIDE_INT bp,
1388 					    unsigned HOST_WIDE_INT brs,
1389 					    unsigned HOST_WIDE_INT bre,
1390 					    gimple *st,
1391 					    unsigned int ord,
1392 					    enum tree_code rhscode,
1393 					    struct symbolic_number &nr,
1394 					    gimple *ins_stmtp,
1395 					    bool bitnotp,
1396 					    const store_operand_info &op0r,
1397 					    const store_operand_info &op1r)
1398   : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
1399     stmt (st), order (ord), rhs_code (rhscode), n (nr),
1400     ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false)
1401 #if __cplusplus >= 201103L
1402     , ops { op0r, op1r }
1403 {
1404 }
1405 #else
1406 {
1407   ops[0] = op0r;
1408   ops[1] = op1r;
1409 }
1410 #endif
1411 
1412 /* Struct representing a group of stores to contiguous memory locations.
1413    These are produced by the second phase (coalescing) and consumed in the
1414    third phase that outputs the widened stores.  */
1415 
1416 struct merged_store_group
1417 {
1418   unsigned HOST_WIDE_INT start;
1419   unsigned HOST_WIDE_INT width;
1420   unsigned HOST_WIDE_INT bitregion_start;
1421   unsigned HOST_WIDE_INT bitregion_end;
1422   /* The size of the allocated memory for val and mask.  */
1423   unsigned HOST_WIDE_INT buf_size;
1424   unsigned HOST_WIDE_INT align_base;
1425   poly_uint64 load_align_base[2];
1426 
1427   unsigned int align;
1428   unsigned int load_align[2];
1429   unsigned int first_order;
1430   unsigned int last_order;
1431   bool bit_insertion;
1432   bool only_constants;
1433   unsigned int first_nonmergeable_order;
1434 
1435   auto_vec<store_immediate_info *> stores;
1436   /* We record the first and last original statements in the sequence because
1437      we'll need their vuse/vdef and replacement position.  It's easier to keep
1438      track of them separately as 'stores' is reordered by apply_stores.  */
1439   gimple *last_stmt;
1440   gimple *first_stmt;
1441   unsigned char *val;
1442   unsigned char *mask;
1443 
1444   merged_store_group (store_immediate_info *);
1445   ~merged_store_group ();
1446   bool can_be_merged_into (store_immediate_info *);
1447   void merge_into (store_immediate_info *);
1448   void merge_overlapping (store_immediate_info *);
1449   bool apply_stores ();
1450 private:
1451   void do_merge (store_immediate_info *);
1452 };
1453 
1454 /* Debug helper.  Dump LEN elements of byte array PTR to FD in hex.  */
1455 
1456 static void
dump_char_array(FILE * fd,unsigned char * ptr,unsigned int len)1457 dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1458 {
1459   if (!fd)
1460     return;
1461 
1462   for (unsigned int i = 0; i < len; i++)
1463     fprintf (fd, "%02x ", ptr[i]);
1464   fprintf (fd, "\n");
1465 }
1466 
1467 /* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
1468    bits between adjacent elements.  AMNT should be within
1469    [0, BITS_PER_UNIT).
1470    Example, AMNT = 2:
1471    00011111|11100000 << 2 = 01111111|10000000
1472    PTR[1]  | PTR[0]         PTR[1]  | PTR[0].  */
1473 
1474 static void
shift_bytes_in_array(unsigned char * ptr,unsigned int sz,unsigned int amnt)1475 shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt)
1476 {
1477   if (amnt == 0)
1478     return;
1479 
1480   unsigned char carry_over = 0U;
1481   unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
1482   unsigned char clear_mask = (~0U) << amnt;
1483 
1484   for (unsigned int i = 0; i < sz; i++)
1485     {
1486       unsigned prev_carry_over = carry_over;
1487       carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
1488 
1489       ptr[i] <<= amnt;
1490       if (i != 0)
1491 	{
1492 	  ptr[i] &= clear_mask;
1493 	  ptr[i] |= prev_carry_over;
1494 	}
1495     }
1496 }
1497 
1498 /* Like shift_bytes_in_array but for big-endian.
1499    Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
1500    bits between adjacent elements.  AMNT should be within
1501    [0, BITS_PER_UNIT).
1502    Example, AMNT = 2:
1503    00011111|11100000 >> 2 = 00000111|11111000
1504    PTR[0]  | PTR[1]         PTR[0]  | PTR[1].  */
1505 
1506 static void
shift_bytes_in_array_right(unsigned char * ptr,unsigned int sz,unsigned int amnt)1507 shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
1508 			    unsigned int amnt)
1509 {
1510   if (amnt == 0)
1511     return;
1512 
1513   unsigned char carry_over = 0U;
1514   unsigned char carry_mask = ~(~0U << amnt);
1515 
1516   for (unsigned int i = 0; i < sz; i++)
1517     {
1518       unsigned prev_carry_over = carry_over;
1519       carry_over = ptr[i] & carry_mask;
1520 
1521       carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
1522       ptr[i] >>= amnt;
1523       ptr[i] |= prev_carry_over;
1524     }
1525 }
1526 
1527 /* Clear out LEN bits starting from bit START in the byte array
1528    PTR.  This clears the bits to the *right* from START.
1529    START must be within [0, BITS_PER_UNIT) and counts starting from
1530    the least significant bit.  */
1531 
1532 static void
clear_bit_region_be(unsigned char * ptr,unsigned int start,unsigned int len)1533 clear_bit_region_be (unsigned char *ptr, unsigned int start,
1534 		     unsigned int len)
1535 {
1536   if (len == 0)
1537     return;
1538   /* Clear len bits to the right of start.  */
1539   else if (len <= start + 1)
1540     {
1541       unsigned char mask = (~(~0U << len));
1542       mask = mask << (start + 1U - len);
1543       ptr[0] &= ~mask;
1544     }
1545   else if (start != BITS_PER_UNIT - 1)
1546     {
1547       clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1548       clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1549 			   len - (start % BITS_PER_UNIT) - 1);
1550     }
1551   else if (start == BITS_PER_UNIT - 1
1552 	   && len > BITS_PER_UNIT)
1553     {
1554       unsigned int nbytes = len / BITS_PER_UNIT;
1555       memset (ptr, 0, nbytes);
1556       if (len % BITS_PER_UNIT != 0)
1557 	clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1558 			     len % BITS_PER_UNIT);
1559     }
1560   else
1561     gcc_unreachable ();
1562 }
1563 
1564 /* In the byte array PTR clear the bit region starting at bit
1565    START and is LEN bits wide.
1566    For regions spanning multiple bytes do this recursively until we reach
1567    zero LEN or a region contained within a single byte.  */
1568 
1569 static void
clear_bit_region(unsigned char * ptr,unsigned int start,unsigned int len)1570 clear_bit_region (unsigned char *ptr, unsigned int start,
1571 		  unsigned int len)
1572 {
1573   /* Degenerate base case.  */
1574   if (len == 0)
1575     return;
1576   else if (start >= BITS_PER_UNIT)
1577     clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1578   /* Second base case.  */
1579   else if ((start + len) <= BITS_PER_UNIT)
1580     {
1581       unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
1582       mask >>= BITS_PER_UNIT - (start + len);
1583 
1584       ptr[0] &= ~mask;
1585 
1586       return;
1587     }
1588   /* Clear most significant bits in a byte and proceed with the next byte.  */
1589   else if (start != 0)
1590     {
1591       clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1592       clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
1593     }
1594   /* Whole bytes need to be cleared.  */
1595   else if (start == 0 && len > BITS_PER_UNIT)
1596     {
1597       unsigned int nbytes = len / BITS_PER_UNIT;
1598       /* We could recurse on each byte but we clear whole bytes, so a simple
1599 	 memset will do.  */
1600       memset (ptr, '\0', nbytes);
1601       /* Clear the remaining sub-byte region if there is one.  */
1602       if (len % BITS_PER_UNIT != 0)
1603 	clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1604     }
1605   else
1606     gcc_unreachable ();
1607 }
1608 
1609 /* Write BITLEN bits of EXPR to the byte array PTR at
1610    bit position BITPOS.  PTR should contain TOTAL_BYTES elements.
1611    Return true if the operation succeeded.  */
1612 
1613 static bool
encode_tree_to_bitpos(tree expr,unsigned char * ptr,int bitlen,int bitpos,unsigned int total_bytes)1614 encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
1615 		       unsigned int total_bytes)
1616 {
1617   unsigned int first_byte = bitpos / BITS_PER_UNIT;
1618   tree tmp_int = expr;
1619   bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1620 			|| (bitpos % BITS_PER_UNIT)
1621 			|| !int_mode_for_size (bitlen, 0).exists ());
1622 
1623   if (!sub_byte_op_p)
1624     return native_encode_expr (tmp_int, ptr + first_byte, total_bytes) != 0;
1625 
1626   /* LITTLE-ENDIAN
1627      We are writing a non byte-sized quantity or at a position that is not
1628      at a byte boundary.
1629      |--------|--------|--------| ptr + first_byte
1630            ^              ^
1631            xxx xxxxxxxx xxx< bp>
1632            |______EXPR____|
1633 
1634      First native_encode_expr EXPR into a temporary buffer and shift each
1635      byte in the buffer by 'bp' (carrying the bits over as necessary).
1636      |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1637                                               <------bitlen---->< bp>
1638     Then we clear the destination bits:
1639     |---00000|00000000|000-----| ptr + first_byte
1640         <-------bitlen--->< bp>
1641 
1642     Finally we ORR the bytes of the shifted EXPR into the cleared region:
1643     |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1644 
1645    BIG-ENDIAN
1646    We are writing a non byte-sized quantity or at a position that is not
1647    at a byte boundary.
1648      ptr + first_byte |--------|--------|--------|
1649                             ^              ^
1650                        <bp >xxx xxxxxxxx xxx
1651                             |_____EXPR_____|
1652 
1653      First native_encode_expr EXPR into a temporary buffer and shift each
1654      byte in the buffer to the right by (carrying the bits over as necessary).
1655      We shift by as much as needed to align the most significant bit of EXPR
1656      with bitpos:
1657      |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1658         <---bitlen---->          <bp ><-----bitlen----->
1659     Then we clear the destination bits:
1660     ptr + first_byte |-----000||00000000||00000---|
1661                       <bp ><-------bitlen----->
1662 
1663     Finally we ORR the bytes of the shifted EXPR into the cleared region:
1664     ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1665     The awkwardness comes from the fact that bitpos is counted from the
1666     most significant bit of a byte.  */
1667 
1668   /* We must be dealing with fixed-size data at this point, since the
1669      total size is also fixed.  */
1670   fixed_size_mode mode = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
1671   /* Allocate an extra byte so that we have space to shift into.  */
1672   unsigned int byte_size = GET_MODE_SIZE (mode) + 1;
1673   unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
1674   memset (tmpbuf, '\0', byte_size);
1675   /* The store detection code should only have allowed constants that are
1676      accepted by native_encode_expr.  */
1677   if (native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
1678     gcc_unreachable ();
1679 
1680   /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1681      bytes to write.  This means it can write more than
1682      ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1683      write 8 bytes for a bitlen of 40).  Skip the bytes that are not within
1684      bitlen and zero out the bits that are not relevant as well (that may
1685      contain a sign bit due to sign-extension).  */
1686   unsigned int padding
1687     = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
1688   /* On big-endian the padding is at the 'front' so just skip the initial
1689      bytes.  */
1690   if (BYTES_BIG_ENDIAN)
1691     tmpbuf += padding;
1692 
1693   byte_size -= padding;
1694 
1695   if (bitlen % BITS_PER_UNIT != 0)
1696     {
1697       if (BYTES_BIG_ENDIAN)
1698 	clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1699 			     BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1700       else
1701 	clear_bit_region (tmpbuf, bitlen,
1702 			  byte_size * BITS_PER_UNIT - bitlen);
1703     }
1704   /* Left shifting relies on the last byte being clear if bitlen is
1705      a multiple of BITS_PER_UNIT, which might not be clear if
1706      there are padding bytes.  */
1707   else if (!BYTES_BIG_ENDIAN)
1708     tmpbuf[byte_size - 1] = '\0';
1709 
1710   /* Clear the bit region in PTR where the bits from TMPBUF will be
1711      inserted into.  */
1712   if (BYTES_BIG_ENDIAN)
1713     clear_bit_region_be (ptr + first_byte,
1714 			 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1715   else
1716     clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1717 
1718   int shift_amnt;
1719   int bitlen_mod = bitlen % BITS_PER_UNIT;
1720   int bitpos_mod = bitpos % BITS_PER_UNIT;
1721 
1722   bool skip_byte = false;
1723   if (BYTES_BIG_ENDIAN)
1724     {
1725       /* BITPOS and BITLEN are exactly aligned and no shifting
1726 	 is necessary.  */
1727       if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1728 	  || (bitpos_mod == 0 && bitlen_mod == 0))
1729 	shift_amnt = 0;
1730       /* |. . . . . . . .|
1731 	  <bp >   <blen >.
1732 	 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1733 	 of the value until it aligns with 'bp' in the next byte over.  */
1734       else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
1735 	{
1736 	  shift_amnt = bitlen_mod + bitpos_mod;
1737 	  skip_byte = bitlen_mod != 0;
1738 	}
1739       /* |. . . . . . . .|
1740 	  <----bp--->
1741 	    <---blen---->.
1742 	 Shift the value right within the same byte so it aligns with 'bp'.  */
1743       else
1744 	shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
1745     }
1746   else
1747     shift_amnt = bitpos % BITS_PER_UNIT;
1748 
1749   /* Create the shifted version of EXPR.  */
1750   if (!BYTES_BIG_ENDIAN)
1751     {
1752       shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
1753       if (shift_amnt == 0)
1754 	byte_size--;
1755     }
1756   else
1757     {
1758       gcc_assert (BYTES_BIG_ENDIAN);
1759       shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
1760       /* If shifting right forced us to move into the next byte skip the now
1761 	 empty byte.  */
1762       if (skip_byte)
1763 	{
1764 	  tmpbuf++;
1765 	  byte_size--;
1766 	}
1767     }
1768 
1769   /* Insert the bits from TMPBUF.  */
1770   for (unsigned int i = 0; i < byte_size; i++)
1771     ptr[first_byte + i] |= tmpbuf[i];
1772 
1773   return true;
1774 }
1775 
1776 /* Sorting function for store_immediate_info objects.
1777    Sorts them by bitposition.  */
1778 
1779 static int
sort_by_bitpos(const void * x,const void * y)1780 sort_by_bitpos (const void *x, const void *y)
1781 {
1782   store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1783   store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1784 
1785   if ((*tmp)->bitpos < (*tmp2)->bitpos)
1786     return -1;
1787   else if ((*tmp)->bitpos > (*tmp2)->bitpos)
1788     return 1;
1789   else
1790     /* If they are the same let's use the order which is guaranteed to
1791        be different.  */
1792     return (*tmp)->order - (*tmp2)->order;
1793 }
1794 
1795 /* Sorting function for store_immediate_info objects.
1796    Sorts them by the order field.  */
1797 
1798 static int
sort_by_order(const void * x,const void * y)1799 sort_by_order (const void *x, const void *y)
1800 {
1801   store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1802   store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1803 
1804   if ((*tmp)->order < (*tmp2)->order)
1805     return -1;
1806   else if ((*tmp)->order > (*tmp2)->order)
1807     return 1;
1808 
1809   gcc_unreachable ();
1810 }
1811 
1812 /* Initialize a merged_store_group object from a store_immediate_info
1813    object.  */
1814 
merged_store_group(store_immediate_info * info)1815 merged_store_group::merged_store_group (store_immediate_info *info)
1816 {
1817   start = info->bitpos;
1818   width = info->bitsize;
1819   bitregion_start = info->bitregion_start;
1820   bitregion_end = info->bitregion_end;
1821   /* VAL has memory allocated for it in apply_stores once the group
1822      width has been finalized.  */
1823   val = NULL;
1824   mask = NULL;
1825   bit_insertion = false;
1826   only_constants = info->rhs_code == INTEGER_CST;
1827   first_nonmergeable_order = ~0U;
1828   unsigned HOST_WIDE_INT align_bitpos = 0;
1829   get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1830 			  &align, &align_bitpos);
1831   align_base = start - align_bitpos;
1832   for (int i = 0; i < 2; ++i)
1833     {
1834       store_operand_info &op = info->ops[i];
1835       if (op.base_addr == NULL_TREE)
1836 	{
1837 	  load_align[i] = 0;
1838 	  load_align_base[i] = 0;
1839 	}
1840       else
1841 	{
1842 	  get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
1843 	  load_align_base[i] = op.bitpos - align_bitpos;
1844 	}
1845     }
1846   stores.create (1);
1847   stores.safe_push (info);
1848   last_stmt = info->stmt;
1849   last_order = info->order;
1850   first_stmt = last_stmt;
1851   first_order = last_order;
1852   buf_size = 0;
1853 }
1854 
~merged_store_group()1855 merged_store_group::~merged_store_group ()
1856 {
1857   if (val)
1858     XDELETEVEC (val);
1859 }
1860 
1861 /* Return true if the store described by INFO can be merged into the group.  */
1862 
1863 bool
can_be_merged_into(store_immediate_info * info)1864 merged_store_group::can_be_merged_into (store_immediate_info *info)
1865 {
1866   /* Do not merge bswap patterns.  */
1867   if (info->rhs_code == LROTATE_EXPR)
1868     return false;
1869 
1870   /* The canonical case.  */
1871   if (info->rhs_code == stores[0]->rhs_code)
1872     return true;
1873 
1874   /* BIT_INSERT_EXPR is compatible with INTEGER_CST.  */
1875   if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
1876     return true;
1877 
1878   if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
1879     return true;
1880 
1881   /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores.  */
1882   if (info->rhs_code == MEM_REF
1883       && (stores[0]->rhs_code == INTEGER_CST
1884 	  || stores[0]->rhs_code == BIT_INSERT_EXPR)
1885       && info->bitregion_start == stores[0]->bitregion_start
1886       && info->bitregion_end == stores[0]->bitregion_end)
1887     return true;
1888 
1889   if (stores[0]->rhs_code == MEM_REF
1890       && (info->rhs_code == INTEGER_CST
1891 	  || info->rhs_code == BIT_INSERT_EXPR)
1892       && info->bitregion_start == stores[0]->bitregion_start
1893       && info->bitregion_end == stores[0]->bitregion_end)
1894     return true;
1895 
1896   return false;
1897 }
1898 
1899 /* Helper method for merge_into and merge_overlapping to do
1900    the common part.  */
1901 
1902 void
do_merge(store_immediate_info * info)1903 merged_store_group::do_merge (store_immediate_info *info)
1904 {
1905   bitregion_start = MIN (bitregion_start, info->bitregion_start);
1906   bitregion_end = MAX (bitregion_end, info->bitregion_end);
1907 
1908   unsigned int this_align;
1909   unsigned HOST_WIDE_INT align_bitpos = 0;
1910   get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1911 			  &this_align, &align_bitpos);
1912   if (this_align > align)
1913     {
1914       align = this_align;
1915       align_base = info->bitpos - align_bitpos;
1916     }
1917   for (int i = 0; i < 2; ++i)
1918     {
1919       store_operand_info &op = info->ops[i];
1920       if (!op.base_addr)
1921 	continue;
1922 
1923       get_object_alignment_1 (op.val, &this_align, &align_bitpos);
1924       if (this_align > load_align[i])
1925 	{
1926 	  load_align[i] = this_align;
1927 	  load_align_base[i] = op.bitpos - align_bitpos;
1928 	}
1929     }
1930 
1931   gimple *stmt = info->stmt;
1932   stores.safe_push (info);
1933   if (info->order > last_order)
1934     {
1935       last_order = info->order;
1936       last_stmt = stmt;
1937     }
1938   else if (info->order < first_order)
1939     {
1940       first_order = info->order;
1941       first_stmt = stmt;
1942     }
1943   if (info->rhs_code != INTEGER_CST)
1944     only_constants = false;
1945 }
1946 
1947 /* Merge a store recorded by INFO into this merged store.
1948    The store is not overlapping with the existing recorded
1949    stores.  */
1950 
1951 void
merge_into(store_immediate_info * info)1952 merged_store_group::merge_into (store_immediate_info *info)
1953 {
1954   /* Make sure we're inserting in the position we think we're inserting.  */
1955   gcc_assert (info->bitpos >= start + width
1956 	      && info->bitregion_start <= bitregion_end);
1957 
1958   width = info->bitpos + info->bitsize - start;
1959   do_merge (info);
1960 }
1961 
1962 /* Merge a store described by INFO into this merged store.
1963    INFO overlaps in some way with the current store (i.e. it's not contiguous
1964    which is handled by merged_store_group::merge_into).  */
1965 
1966 void
merge_overlapping(store_immediate_info * info)1967 merged_store_group::merge_overlapping (store_immediate_info *info)
1968 {
1969   /* If the store extends the size of the group, extend the width.  */
1970   if (info->bitpos + info->bitsize > start + width)
1971     width = info->bitpos + info->bitsize - start;
1972 
1973   do_merge (info);
1974 }
1975 
1976 /* Go through all the recorded stores in this group in program order and
1977    apply their values to the VAL byte array to create the final merged
1978    value.  Return true if the operation succeeded.  */
1979 
1980 bool
apply_stores()1981 merged_store_group::apply_stores ()
1982 {
1983   /* Make sure we have more than one store in the group, otherwise we cannot
1984      merge anything.  */
1985   if (bitregion_start % BITS_PER_UNIT != 0
1986       || bitregion_end % BITS_PER_UNIT != 0
1987       || stores.length () == 1)
1988     return false;
1989 
1990   stores.qsort (sort_by_order);
1991   store_immediate_info *info;
1992   unsigned int i;
1993   /* Create a power-of-2-sized buffer for native_encode_expr.  */
1994   buf_size = 1 << ceil_log2 ((bitregion_end - bitregion_start) / BITS_PER_UNIT);
1995   val = XNEWVEC (unsigned char, 2 * buf_size);
1996   mask = val + buf_size;
1997   memset (val, 0, buf_size);
1998   memset (mask, ~0U, buf_size);
1999 
2000   FOR_EACH_VEC_ELT (stores, i, info)
2001     {
2002       unsigned int pos_in_buffer = info->bitpos - bitregion_start;
2003       tree cst;
2004       if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
2005 	cst = info->ops[0].val;
2006       else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
2007 	cst = info->ops[1].val;
2008       else
2009 	cst = NULL_TREE;
2010       bool ret = true;
2011       if (cst)
2012 	{
2013 	  if (info->rhs_code == BIT_INSERT_EXPR)
2014 	    bit_insertion = true;
2015 	  else
2016 	    ret = encode_tree_to_bitpos (cst, val, info->bitsize,
2017 					 pos_in_buffer, buf_size);
2018 	}
2019       unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2020       if (BYTES_BIG_ENDIAN)
2021 	clear_bit_region_be (m, (BITS_PER_UNIT - 1
2022 				 - (pos_in_buffer % BITS_PER_UNIT)),
2023 			     info->bitsize);
2024       else
2025 	clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
2026       if (cst && dump_file && (dump_flags & TDF_DETAILS))
2027 	{
2028 	  if (ret)
2029 	    {
2030 	      fputs ("After writing ", dump_file);
2031 	      print_generic_expr (dump_file, cst, TDF_NONE);
2032 	      fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
2033 		       " at position %d\n", info->bitsize, pos_in_buffer);
2034 	      fputs ("  the merged value contains ", dump_file);
2035 	      dump_char_array (dump_file, val, buf_size);
2036 	      fputs ("  the merged mask contains  ", dump_file);
2037 	      dump_char_array (dump_file, mask, buf_size);
2038 	      if (bit_insertion)
2039 		fputs ("  bit insertion is required\n", dump_file);
2040 	    }
2041 	  else
2042 	    fprintf (dump_file, "Failed to merge stores\n");
2043 	}
2044       if (!ret)
2045 	return false;
2046     }
2047   stores.qsort (sort_by_bitpos);
2048   return true;
2049 }
2050 
2051 /* Structure describing the store chain.  */
2052 
2053 struct imm_store_chain_info
2054 {
2055   /* Doubly-linked list that imposes an order on chain processing.
2056      PNXP (prev's next pointer) points to the head of a list, or to
2057      the next field in the previous chain in the list.
2058      See pass_store_merging::m_stores_head for more rationale.  */
2059   imm_store_chain_info *next, **pnxp;
2060   tree base_addr;
2061   auto_vec<store_immediate_info *> m_store_info;
2062   auto_vec<merged_store_group *> m_merged_store_groups;
2063 
imm_store_chain_infoimm_store_chain_info2064   imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2065   : next (inspt), pnxp (&inspt), base_addr (b_a)
2066   {
2067     inspt = this;
2068     if (next)
2069       {
2070 	gcc_checking_assert (pnxp == next->pnxp);
2071 	next->pnxp = &next;
2072       }
2073   }
~imm_store_chain_infoimm_store_chain_info2074   ~imm_store_chain_info ()
2075   {
2076     *pnxp = next;
2077     if (next)
2078       {
2079 	gcc_checking_assert (&next == next->pnxp);
2080 	next->pnxp = pnxp;
2081       }
2082   }
2083   bool terminate_and_process_chain ();
2084   bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
2085 			   unsigned int);
2086   bool coalesce_immediate_stores ();
2087   bool output_merged_store (merged_store_group *);
2088   bool output_merged_stores ();
2089 };
2090 
2091 const pass_data pass_data_tree_store_merging = {
2092   GIMPLE_PASS,     /* type */
2093   "store-merging", /* name */
2094   OPTGROUP_NONE,   /* optinfo_flags */
2095   TV_GIMPLE_STORE_MERGING,	 /* tv_id */
2096   PROP_ssa,	/* properties_required */
2097   0,		   /* properties_provided */
2098   0,		   /* properties_destroyed */
2099   0,		   /* todo_flags_start */
2100   TODO_update_ssa, /* todo_flags_finish */
2101 };
2102 
2103 class pass_store_merging : public gimple_opt_pass
2104 {
2105 public:
pass_store_merging(gcc::context * ctxt)2106   pass_store_merging (gcc::context *ctxt)
2107     : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head ()
2108   {
2109   }
2110 
2111   /* Pass not supported for PDP-endian, nor for insane hosts or
2112      target character sizes where native_{encode,interpret}_expr
2113      doesn't work properly.  */
2114   virtual bool
gate(function *)2115   gate (function *)
2116   {
2117     return flag_store_merging
2118 	   && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
2119 	   && CHAR_BIT == 8
2120 	   && BITS_PER_UNIT == 8;
2121   }
2122 
2123   virtual unsigned int execute (function *);
2124 
2125 private:
2126   hash_map<tree_operand_hash, struct imm_store_chain_info *> m_stores;
2127 
2128   /* Form a doubly-linked stack of the elements of m_stores, so that
2129      we can iterate over them in a predictable way.  Using this order
2130      avoids extraneous differences in the compiler output just because
2131      of tree pointer variations (e.g. different chains end up in
2132      different positions of m_stores, so they are handled in different
2133      orders, so they allocate or release SSA names in different
2134      orders, and when they get reused, subsequent passes end up
2135      getting different SSA names, which may ultimately change
2136      decisions when going out of SSA).  */
2137   imm_store_chain_info *m_stores_head;
2138 
2139   void process_store (gimple *);
2140   bool terminate_and_process_all_chains ();
2141   bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
2142   bool terminate_and_release_chain (imm_store_chain_info *);
2143 }; // class pass_store_merging
2144 
2145 /* Terminate and process all recorded chains.  Return true if any changes
2146    were made.  */
2147 
2148 bool
terminate_and_process_all_chains()2149 pass_store_merging::terminate_and_process_all_chains ()
2150 {
2151   bool ret = false;
2152   while (m_stores_head)
2153     ret |= terminate_and_release_chain (m_stores_head);
2154   gcc_assert (m_stores.elements () == 0);
2155   gcc_assert (m_stores_head == NULL);
2156 
2157   return ret;
2158 }
2159 
2160 /* Terminate all chains that are affected by the statement STMT.
2161    CHAIN_INFO is the chain we should ignore from the checks if
2162    non-NULL.  */
2163 
2164 bool
terminate_all_aliasing_chains(imm_store_chain_info ** chain_info,gimple * stmt)2165 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2166 						     **chain_info,
2167 						   gimple *stmt)
2168 {
2169   bool ret = false;
2170 
2171   /* If the statement doesn't touch memory it can't alias.  */
2172   if (!gimple_vuse (stmt))
2173     return false;
2174 
2175   tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
2176   for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
2177     {
2178       next = cur->next;
2179 
2180       /* We already checked all the stores in chain_info and terminated the
2181 	 chain if necessary.  Skip it here.  */
2182       if (chain_info && *chain_info == cur)
2183 	continue;
2184 
2185       store_immediate_info *info;
2186       unsigned int i;
2187       FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
2188 	{
2189 	  tree lhs = gimple_assign_lhs (info->stmt);
2190 	  if (ref_maybe_used_by_stmt_p (stmt, lhs)
2191 	      || stmt_may_clobber_ref_p (stmt, lhs)
2192 	      || (store_lhs && refs_output_dependent_p (store_lhs, lhs)))
2193 	    {
2194 	      if (dump_file && (dump_flags & TDF_DETAILS))
2195 		{
2196 		  fprintf (dump_file, "stmt causes chain termination:\n");
2197 		  print_gimple_stmt (dump_file, stmt, 0);
2198 		}
2199 	      terminate_and_release_chain (cur);
2200 	      ret = true;
2201 	      break;
2202 	    }
2203 	}
2204     }
2205 
2206   return ret;
2207 }
2208 
2209 /* Helper function.  Terminate the recorded chain storing to base object
2210    BASE.  Return true if the merging and output was successful.  The m_stores
2211    entry is removed after the processing in any case.  */
2212 
2213 bool
terminate_and_release_chain(imm_store_chain_info * chain_info)2214 pass_store_merging::terminate_and_release_chain (imm_store_chain_info *chain_info)
2215 {
2216   bool ret = chain_info->terminate_and_process_chain ();
2217   m_stores.remove (chain_info->base_addr);
2218   delete chain_info;
2219   return ret;
2220 }
2221 
2222 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2223    may clobber REF.  FIRST and LAST must be in the same basic block and
2224    have non-NULL vdef.  We want to be able to sink load of REF across
2225    stores between FIRST and LAST, up to right before LAST.  */
2226 
2227 bool
stmts_may_clobber_ref_p(gimple * first,gimple * last,tree ref)2228 stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2229 {
2230   ao_ref r;
2231   ao_ref_init (&r, ref);
2232   unsigned int count = 0;
2233   tree vop = gimple_vdef (last);
2234   gimple *stmt;
2235 
2236   gcc_checking_assert (gimple_bb (first) == gimple_bb (last));
2237   do
2238     {
2239       stmt = SSA_NAME_DEF_STMT (vop);
2240       if (stmt_may_clobber_ref_p_1 (stmt, &r))
2241 	return true;
2242       if (gimple_store_p (stmt)
2243 	  && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2244 	return true;
2245       /* Avoid quadratic compile time by bounding the number of checks
2246 	 we perform.  */
2247       if (++count > MAX_STORE_ALIAS_CHECKS)
2248 	return true;
2249       vop = gimple_vuse (stmt);
2250     }
2251   while (stmt != first);
2252   return false;
2253 }
2254 
2255 /* Return true if INFO->ops[IDX] is mergeable with the
2256    corresponding loads already in MERGED_STORE group.
2257    BASE_ADDR is the base address of the whole store group.  */
2258 
2259 bool
compatible_load_p(merged_store_group * merged_store,store_immediate_info * info,tree base_addr,int idx)2260 compatible_load_p (merged_store_group *merged_store,
2261 		   store_immediate_info *info,
2262 		   tree base_addr, int idx)
2263 {
2264   store_immediate_info *infof = merged_store->stores[0];
2265   if (!info->ops[idx].base_addr
2266       || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2267 		   info->bitpos - infof->bitpos)
2268       || !operand_equal_p (info->ops[idx].base_addr,
2269 			   infof->ops[idx].base_addr, 0))
2270     return false;
2271 
2272   store_immediate_info *infol = merged_store->stores.last ();
2273   tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2274   /* In this case all vuses should be the same, e.g.
2275      _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2276      or
2277      _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2278      and we can emit the coalesced load next to any of those loads.  */
2279   if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2280       && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2281     return true;
2282 
2283   /* Otherwise, at least for now require that the load has the same
2284      vuse as the store.  See following examples.  */
2285   if (gimple_vuse (info->stmt) != load_vuse)
2286     return false;
2287 
2288   if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2289       || (infof != infol
2290 	  && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2291     return false;
2292 
2293   /* If the load is from the same location as the store, already
2294      the construction of the immediate chain info guarantees no intervening
2295      stores, so no further checks are needed.  Example:
2296      _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4;  */
2297   if (known_eq (info->ops[idx].bitpos, info->bitpos)
2298       && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2299     return true;
2300 
2301   /* Otherwise, we need to punt if any of the loads can be clobbered by any
2302      of the stores in the group, or any other stores in between those.
2303      Previous calls to compatible_load_p ensured that for all the
2304      merged_store->stores IDX loads, no stmts starting with
2305      merged_store->first_stmt and ending right before merged_store->last_stmt
2306      clobbers those loads.  */
2307   gimple *first = merged_store->first_stmt;
2308   gimple *last = merged_store->last_stmt;
2309   unsigned int i;
2310   store_immediate_info *infoc;
2311   /* The stores are sorted by increasing store bitpos, so if info->stmt store
2312      comes before the so far first load, we'll be changing
2313      merged_store->first_stmt.  In that case we need to give up if
2314      any of the earlier processed loads clobber with the stmts in the new
2315      range.  */
2316   if (info->order < merged_store->first_order)
2317     {
2318       FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2319 	if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2320 	  return false;
2321       first = info->stmt;
2322     }
2323   /* Similarly, we could change merged_store->last_stmt, so ensure
2324      in that case no stmts in the new range clobber any of the earlier
2325      processed loads.  */
2326   else if (info->order > merged_store->last_order)
2327     {
2328       FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2329 	if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2330 	  return false;
2331       last = info->stmt;
2332     }
2333   /* And finally, we'd be adding a new load to the set, ensure it isn't
2334      clobbered in the new range.  */
2335   if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2336     return false;
2337 
2338   /* Otherwise, we are looking for:
2339      _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2340      or
2341      _1 = s.a; t.a = _1; _2 = s.b; t.b = _2;  */
2342   return true;
2343 }
2344 
2345 /* Add all refs loaded to compute VAL to REFS vector.  */
2346 
2347 void
gather_bswap_load_refs(vec<tree> * refs,tree val)2348 gather_bswap_load_refs (vec<tree> *refs, tree val)
2349 {
2350   if (TREE_CODE (val) != SSA_NAME)
2351     return;
2352 
2353   gimple *stmt = SSA_NAME_DEF_STMT (val);
2354   if (!is_gimple_assign (stmt))
2355     return;
2356 
2357   if (gimple_assign_load_p (stmt))
2358     {
2359       refs->safe_push (gimple_assign_rhs1 (stmt));
2360       return;
2361     }
2362 
2363   switch (gimple_assign_rhs_class (stmt))
2364     {
2365     case GIMPLE_BINARY_RHS:
2366       gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2367       /* FALLTHRU */
2368     case GIMPLE_UNARY_RHS:
2369       gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2370       break;
2371     default:
2372       gcc_unreachable ();
2373     }
2374 }
2375 
2376 /* Check if there are any stores in M_STORE_INFO after index I
2377    (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2378    a potential group ending with END that have their order
2379    smaller than LAST_ORDER.  ALL_INTEGER_CST_P is true if
2380    all the stores already merged and the one under consideration
2381    have rhs_code of INTEGER_CST.  Return true if there are no such stores.
2382    Consider:
2383      MEM[(long long int *)p_28] = 0;
2384      MEM[(long long int *)p_28 + 8B] = 0;
2385      MEM[(long long int *)p_28 + 16B] = 0;
2386      MEM[(long long int *)p_28 + 24B] = 0;
2387      _129 = (int) _130;
2388      MEM[(int *)p_28 + 8B] = _129;
2389      MEM[(int *)p_28].a = -1;
2390    We already have
2391      MEM[(long long int *)p_28] = 0;
2392      MEM[(int *)p_28].a = -1;
2393    stmts in the current group and need to consider if it is safe to
2394    add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2395    There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2396    store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2397    into the group and merging of those 3 stores is successful, merged
2398    stmts will be emitted at the latest store from that group, i.e.
2399    LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2400    The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2401    the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2402    so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2403    into the group.  That way it will be its own store group and will
2404    not be touched.  If ALL_INTEGER_CST_P and there are overlapping
2405    INTEGER_CST stores, those are mergeable using merge_overlapping,
2406    so don't return false for those.
2407 
2408    Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2409    (exclusive), whether they don't overlap the bitrange START to END
2410    and have order in between FIRST_ORDER and LAST_ORDER.  This is to
2411    prevent merging in cases like:
2412      MEM <char[12]> [&b + 8B] = {};
2413      MEM[(short *) &b] = 5;
2414      _5 = *x_4(D);
2415      MEM <long long unsigned int> [&b + 2B] = _5;
2416      MEM[(char *)&b + 16B] = 88;
2417      MEM[(int *)&b + 20B] = 1;
2418    The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2419    be merged with it, because the = _5 store overlaps these and is in between
2420    them in sort_by_order ordering.  If it was merged, the merged store would
2421    go after the = _5 store and thus change behavior.  */
2422 
2423 static bool
check_no_overlap(vec<store_immediate_info * > m_store_info,unsigned int i,bool all_integer_cst_p,unsigned int first_order,unsigned int last_order,unsigned HOST_WIDE_INT start,unsigned HOST_WIDE_INT end,unsigned int first_earlier,unsigned end_earlier)2424 check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
2425 		  bool all_integer_cst_p, unsigned int first_order,
2426 		  unsigned int last_order, unsigned HOST_WIDE_INT start,
2427 		  unsigned HOST_WIDE_INT end, unsigned int first_earlier,
2428 		  unsigned end_earlier)
2429 {
2430   unsigned int len = m_store_info.length ();
2431   for (unsigned int j = first_earlier; j < end_earlier; j++)
2432     {
2433       store_immediate_info *info = m_store_info[j];
2434       if (info->order > first_order
2435 	  && info->order < last_order
2436 	  && info->bitpos + info->bitsize > start)
2437 	return false;
2438     }
2439   for (++i; i < len; ++i)
2440     {
2441       store_immediate_info *info = m_store_info[i];
2442       if (info->bitpos >= end)
2443 	break;
2444       if (info->order < last_order
2445 	  && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
2446 	return false;
2447     }
2448   return true;
2449 }
2450 
2451 /* Return true if m_store_info[first] and at least one following store
2452    form a group which store try_size bitsize value which is byte swapped
2453    from a memory load or some value, or identity from some value.
2454    This uses the bswap pass APIs.  */
2455 
2456 bool
try_coalesce_bswap(merged_store_group * merged_store,unsigned int first,unsigned int try_size,unsigned int first_earlier)2457 imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2458 					  unsigned int first,
2459 					  unsigned int try_size,
2460 					  unsigned int first_earlier)
2461 {
2462   unsigned int len = m_store_info.length (), last = first;
2463   unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2464   if (width >= try_size)
2465     return false;
2466   for (unsigned int i = first + 1; i < len; ++i)
2467     {
2468       if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2469 	  || m_store_info[i]->ins_stmt == NULL)
2470 	return false;
2471       width += m_store_info[i]->bitsize;
2472       if (width >= try_size)
2473 	{
2474 	  last = i;
2475 	  break;
2476 	}
2477     }
2478   if (width != try_size)
2479     return false;
2480 
2481   bool allow_unaligned
2482     = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
2483   /* Punt if the combined store would not be aligned and we need alignment.  */
2484   if (!allow_unaligned)
2485     {
2486       unsigned int align = merged_store->align;
2487       unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2488       for (unsigned int i = first + 1; i <= last; ++i)
2489 	{
2490 	  unsigned int this_align;
2491 	  unsigned HOST_WIDE_INT align_bitpos = 0;
2492 	  get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2493 				  &this_align, &align_bitpos);
2494 	  if (this_align > align)
2495 	    {
2496 	      align = this_align;
2497 	      align_base = m_store_info[i]->bitpos - align_bitpos;
2498 	    }
2499 	}
2500       unsigned HOST_WIDE_INT align_bitpos
2501 	= (m_store_info[first]->bitpos - align_base) & (align - 1);
2502       if (align_bitpos)
2503 	align = least_bit_hwi (align_bitpos);
2504       if (align < try_size)
2505 	return false;
2506     }
2507 
2508   tree type;
2509   switch (try_size)
2510     {
2511     case 16: type = uint16_type_node; break;
2512     case 32: type = uint32_type_node; break;
2513     case 64: type = uint64_type_node; break;
2514     default: gcc_unreachable ();
2515     }
2516   struct symbolic_number n;
2517   gimple *ins_stmt = NULL;
2518   int vuse_store = -1;
2519   unsigned int first_order = merged_store->first_order;
2520   unsigned int last_order = merged_store->last_order;
2521   gimple *first_stmt = merged_store->first_stmt;
2522   gimple *last_stmt = merged_store->last_stmt;
2523   unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
2524   store_immediate_info *infof = m_store_info[first];
2525 
2526   for (unsigned int i = first; i <= last; ++i)
2527     {
2528       store_immediate_info *info = m_store_info[i];
2529       struct symbolic_number this_n = info->n;
2530       this_n.type = type;
2531       if (!this_n.base_addr)
2532 	this_n.range = try_size / BITS_PER_UNIT;
2533       else
2534 	/* Update vuse in case it has changed by output_merged_stores.  */
2535 	this_n.vuse = gimple_vuse (info->ins_stmt);
2536       unsigned int bitpos = info->bitpos - infof->bitpos;
2537       if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2538 			    BYTES_BIG_ENDIAN
2539 			    ? try_size - info->bitsize - bitpos
2540 			    : bitpos))
2541 	return false;
2542       if (this_n.base_addr && vuse_store)
2543 	{
2544 	  unsigned int j;
2545 	  for (j = first; j <= last; ++j)
2546 	    if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2547 	      break;
2548 	  if (j > last)
2549 	    {
2550 	      if (vuse_store == 1)
2551 		return false;
2552 	      vuse_store = 0;
2553 	    }
2554 	}
2555       if (i == first)
2556 	{
2557 	  n = this_n;
2558 	  ins_stmt = info->ins_stmt;
2559 	}
2560       else
2561 	{
2562 	  if (n.base_addr && n.vuse != this_n.vuse)
2563 	    {
2564 	      if (vuse_store == 0)
2565 		return false;
2566 	      vuse_store = 1;
2567 	    }
2568 	  if (info->order > last_order)
2569 	    {
2570 	      last_order = info->order;
2571 	      last_stmt = info->stmt;
2572 	    }
2573 	  else if (info->order < first_order)
2574 	    {
2575 	      first_order = info->order;
2576 	      first_stmt = info->stmt;
2577 	    }
2578 	  end = MAX (end, info->bitpos + info->bitsize);
2579 
2580 	  ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2581 					     &this_n, &n);
2582 	  if (ins_stmt == NULL)
2583 	    return false;
2584 	}
2585     }
2586 
2587   uint64_t cmpxchg, cmpnop;
2588   find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop);
2589 
2590   /* A complete byte swap should make the symbolic number to start with
2591      the largest digit in the highest order byte.  Unchanged symbolic
2592      number indicates a read with same endianness as target architecture.  */
2593   if (n.n != cmpnop && n.n != cmpxchg)
2594     return false;
2595 
2596   if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2597     return false;
2598 
2599   if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
2600 			 merged_store->start, end, first_earlier, first))
2601     return false;
2602 
2603   /* Don't handle memory copy this way if normal non-bswap processing
2604      would handle it too.  */
2605   if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2606     {
2607       unsigned int i;
2608       for (i = first; i <= last; ++i)
2609 	if (m_store_info[i]->rhs_code != MEM_REF)
2610 	  break;
2611       if (i == last + 1)
2612 	return false;
2613     }
2614 
2615   if (n.n == cmpxchg)
2616     switch (try_size)
2617       {
2618       case 16:
2619 	/* Will emit LROTATE_EXPR.  */
2620 	break;
2621       case 32:
2622 	if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2623 	    && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2624 	  break;
2625 	return false;
2626       case 64:
2627 	if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2628 	    && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2629 	  break;
2630 	return false;
2631       default:
2632 	gcc_unreachable ();
2633       }
2634 
2635   if (!allow_unaligned && n.base_addr)
2636     {
2637       unsigned int align = get_object_alignment (n.src);
2638       if (align < try_size)
2639 	return false;
2640     }
2641 
2642   /* If each load has vuse of the corresponding store, need to verify
2643      the loads can be sunk right before the last store.  */
2644   if (vuse_store == 1)
2645     {
2646       auto_vec<tree, 64> refs;
2647       for (unsigned int i = first; i <= last; ++i)
2648 	gather_bswap_load_refs (&refs,
2649 				gimple_assign_rhs1 (m_store_info[i]->stmt));
2650 
2651       unsigned int i;
2652       tree ref;
2653       FOR_EACH_VEC_ELT (refs, i, ref)
2654 	if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2655 	  return false;
2656       n.vuse = NULL_TREE;
2657     }
2658 
2659   infof->n = n;
2660   infof->ins_stmt = ins_stmt;
2661   for (unsigned int i = first; i <= last; ++i)
2662     {
2663       m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
2664       m_store_info[i]->ops[0].base_addr = NULL_TREE;
2665       m_store_info[i]->ops[1].base_addr = NULL_TREE;
2666       if (i != first)
2667 	merged_store->merge_into (m_store_info[i]);
2668     }
2669 
2670   return true;
2671 }
2672 
2673 /* Go through the candidate stores recorded in m_store_info and merge them
2674    into merged_store_group objects recorded into m_merged_store_groups
2675    representing the widened stores.  Return true if coalescing was successful
2676    and the number of widened stores is fewer than the original number
2677    of stores.  */
2678 
2679 bool
coalesce_immediate_stores()2680 imm_store_chain_info::coalesce_immediate_stores ()
2681 {
2682   /* Anything less can't be processed.  */
2683   if (m_store_info.length () < 2)
2684     return false;
2685 
2686   if (dump_file && (dump_flags & TDF_DETAILS))
2687     fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
2688 	     m_store_info.length ());
2689 
2690   store_immediate_info *info;
2691   unsigned int i, ignore = 0;
2692   unsigned int first_earlier = 0;
2693   unsigned int end_earlier = 0;
2694 
2695   /* Order the stores by the bitposition they write to.  */
2696   m_store_info.qsort (sort_by_bitpos);
2697 
2698   info = m_store_info[0];
2699   merged_store_group *merged_store = new merged_store_group (info);
2700   if (dump_file && (dump_flags & TDF_DETAILS))
2701     fputs ("New store group\n", dump_file);
2702 
2703   FOR_EACH_VEC_ELT (m_store_info, i, info)
2704     {
2705       if (i <= ignore)
2706 	goto done;
2707 
2708       while (first_earlier < end_earlier
2709 	     && (m_store_info[first_earlier]->bitpos
2710 		 + m_store_info[first_earlier]->bitsize
2711 		 <= merged_store->start))
2712 	first_earlier++;
2713 
2714       /* First try to handle group of stores like:
2715 	 p[0] = data >> 24;
2716 	 p[1] = data >> 16;
2717 	 p[2] = data >> 8;
2718 	 p[3] = data;
2719 	 using the bswap framework.  */
2720       if (info->bitpos == merged_store->start + merged_store->width
2721 	  && merged_store->stores.length () == 1
2722 	  && merged_store->stores[0]->ins_stmt != NULL
2723 	  && info->ins_stmt != NULL)
2724 	{
2725 	  unsigned int try_size;
2726 	  for (try_size = 64; try_size >= 16; try_size >>= 1)
2727 	    if (try_coalesce_bswap (merged_store, i - 1, try_size,
2728 				    first_earlier))
2729 	      break;
2730 
2731 	  if (try_size >= 16)
2732 	    {
2733 	      ignore = i + merged_store->stores.length () - 1;
2734 	      m_merged_store_groups.safe_push (merged_store);
2735 	      if (ignore < m_store_info.length ())
2736 		{
2737 		  merged_store = new merged_store_group (m_store_info[ignore]);
2738 		  end_earlier = ignore;
2739 		}
2740 	      else
2741 		merged_store = NULL;
2742 	      goto done;
2743 	    }
2744 	}
2745 
2746       if (info->order >= merged_store->first_nonmergeable_order)
2747 	;
2748 
2749       /* |---store 1---|
2750 	       |---store 2---|
2751 	 Overlapping stores.  */
2752       else if (IN_RANGE (info->bitpos, merged_store->start,
2753 			 merged_store->start + merged_store->width - 1)
2754 	       /* |---store 1---||---store 2---|
2755 		  Handle also the consecutive INTEGER_CST stores case here,
2756 		  as we have here the code to deal with overlaps.  */
2757 	       || (info->bitregion_start <= merged_store->bitregion_end
2758 		   && info->rhs_code == INTEGER_CST
2759 		   && merged_store->only_constants
2760 		   && merged_store->can_be_merged_into (info)))
2761 	{
2762 	  /* Only allow overlapping stores of constants.  */
2763 	  if (info->rhs_code == INTEGER_CST && merged_store->only_constants)
2764 	    {
2765 	      unsigned int first_order
2766 		= MIN (merged_store->first_order, info->order);
2767 	      unsigned int last_order
2768 		= MAX (merged_store->last_order, info->order);
2769 	      unsigned HOST_WIDE_INT end
2770 		= MAX (merged_store->start + merged_store->width,
2771 		       info->bitpos + info->bitsize);
2772 	      if (check_no_overlap (m_store_info, i, true, first_order,
2773 				    last_order, merged_store->start, end,
2774 				    first_earlier, end_earlier))
2775 		{
2776 		  /* check_no_overlap call above made sure there are no
2777 		     overlapping stores with non-INTEGER_CST rhs_code
2778 		     in between the first and last of the stores we've
2779 		     just merged.  If there are any INTEGER_CST rhs_code
2780 		     stores in between, we need to merge_overlapping them
2781 		     even if in the sort_by_bitpos order there are other
2782 		     overlapping stores in between.  Keep those stores as is.
2783 		     Example:
2784 			MEM[(int *)p_28] = 0;
2785 			MEM[(char *)p_28 + 3B] = 1;
2786 			MEM[(char *)p_28 + 1B] = 2;
2787 			MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
2788 		     We can't merge the zero store with the store of two and
2789 		     not merge anything else, because the store of one is
2790 		     in the original order in between those two, but in
2791 		     store_by_bitpos order it comes after the last store that
2792 		     we can't merge with them.  We can merge the first 3 stores
2793 		     and keep the last store as is though.  */
2794 		  unsigned int len = m_store_info.length ();
2795 		  unsigned int try_order = last_order;
2796 		  unsigned int first_nonmergeable_order;
2797 		  unsigned int k;
2798 		  bool last_iter = false;
2799 		  int attempts = 0;
2800 		  do
2801 		    {
2802 		      unsigned int max_order = 0;
2803 		      unsigned int min_order = first_order;
2804 		      unsigned first_nonmergeable_int_order = ~0U;
2805 		      unsigned HOST_WIDE_INT this_end = end;
2806 		      k = i;
2807 		      first_nonmergeable_order = ~0U;
2808 		      for (unsigned int j = i + 1; j < len; ++j)
2809 			{
2810 			  store_immediate_info *info2 = m_store_info[j];
2811 			  if (info2->bitpos >= this_end)
2812 			    break;
2813 			  if (info2->order < try_order)
2814 			    {
2815 			      if (info2->rhs_code != INTEGER_CST)
2816 				{
2817 				  /* Normally check_no_overlap makes sure this
2818 				     doesn't happen, but if end grows below,
2819 				     then we need to process more stores than
2820 				     check_no_overlap verified.  Example:
2821 				      MEM[(int *)p_5] = 0;
2822 				      MEM[(short *)p_5 + 3B] = 1;
2823 				      MEM[(char *)p_5 + 4B] = _9;
2824 				      MEM[(char *)p_5 + 2B] = 2;  */
2825 				  k = 0;
2826 				  break;
2827 				}
2828 			      k = j;
2829 			      min_order = MIN (min_order, info2->order);
2830 			      this_end = MAX (this_end,
2831 					      info2->bitpos + info2->bitsize);
2832 			    }
2833 			  else if (info2->rhs_code == INTEGER_CST
2834 				   && !last_iter)
2835 			    {
2836 			      max_order = MAX (max_order, info2->order + 1);
2837 			      first_nonmergeable_int_order
2838 				= MIN (first_nonmergeable_int_order,
2839 				       info2->order);
2840 			    }
2841 			  else
2842 			    first_nonmergeable_order
2843 			      = MIN (first_nonmergeable_order, info2->order);
2844 			}
2845 		      if (k > i
2846 			  && !check_no_overlap (m_store_info, len - 1, true,
2847 						min_order, try_order,
2848 						merged_store->start, this_end,
2849 						first_earlier, end_earlier))
2850 			k = 0;
2851 		      if (k == 0)
2852 			{
2853 			  if (last_order == try_order)
2854 			    break;
2855 			  /* If this failed, but only because we grew
2856 			     try_order, retry with the last working one,
2857 			     so that we merge at least something.  */
2858 			  try_order = last_order;
2859 			  last_iter = true;
2860 			  continue;
2861 			}
2862 		      last_order = try_order;
2863 		      /* Retry with a larger try_order to see if we could
2864 			 merge some further INTEGER_CST stores.  */
2865 		      if (max_order
2866 			  && (first_nonmergeable_int_order
2867 			      < first_nonmergeable_order))
2868 			{
2869 			  try_order = MIN (max_order,
2870 					   first_nonmergeable_order);
2871 			  try_order
2872 			    = MIN (try_order,
2873 				   merged_store->first_nonmergeable_order);
2874 			  if (try_order > last_order && ++attempts < 16)
2875 			    continue;
2876 			}
2877 		      first_nonmergeable_order
2878 			= MIN (first_nonmergeable_order,
2879 			       first_nonmergeable_int_order);
2880 		      end = this_end;
2881 		      break;
2882 		    }
2883 		  while (1);
2884 
2885 		  if (k != 0)
2886 		    {
2887 		      merged_store->merge_overlapping (info);
2888 
2889 		      merged_store->first_nonmergeable_order
2890 			= MIN (merged_store->first_nonmergeable_order,
2891 			       first_nonmergeable_order);
2892 
2893 		      for (unsigned int j = i + 1; j <= k; j++)
2894 			{
2895 			  store_immediate_info *info2 = m_store_info[j];
2896 			  gcc_assert (info2->bitpos < end);
2897 			  if (info2->order < last_order)
2898 			    {
2899 			      gcc_assert (info2->rhs_code == INTEGER_CST);
2900 			      if (info != info2)
2901 				merged_store->merge_overlapping (info2);
2902 			    }
2903 			  /* Other stores are kept and not merged in any
2904 			     way.  */
2905 			}
2906 		      ignore = k;
2907 		      goto done;
2908 		    }
2909 		}
2910 	    }
2911 	}
2912       /* |---store 1---||---store 2---|
2913 	 This store is consecutive to the previous one.
2914 	 Merge it into the current store group.  There can be gaps in between
2915 	 the stores, but there can't be gaps in between bitregions.  */
2916       else if (info->bitregion_start <= merged_store->bitregion_end
2917 	       && merged_store->can_be_merged_into (info))
2918 	{
2919 	  store_immediate_info *infof = merged_store->stores[0];
2920 
2921 	  /* All the rhs_code ops that take 2 operands are commutative,
2922 	     swap the operands if it could make the operands compatible.  */
2923 	  if (infof->ops[0].base_addr
2924 	      && infof->ops[1].base_addr
2925 	      && info->ops[0].base_addr
2926 	      && info->ops[1].base_addr
2927 	      && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
2928 			   info->bitpos - infof->bitpos)
2929 	      && operand_equal_p (info->ops[1].base_addr,
2930 				  infof->ops[0].base_addr, 0))
2931 	    {
2932 	      std::swap (info->ops[0], info->ops[1]);
2933 	      info->ops_swapped_p = true;
2934 	    }
2935 	  if (check_no_overlap (m_store_info, i, false,
2936 				MIN (merged_store->first_order, info->order),
2937 				MAX (merged_store->last_order, info->order),
2938 				merged_store->start,
2939 				MAX (merged_store->start + merged_store->width,
2940 				     info->bitpos + info->bitsize),
2941 				first_earlier, end_earlier))
2942 	    {
2943 	      /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores.  */
2944 	      if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
2945 		{
2946 		  info->rhs_code = BIT_INSERT_EXPR;
2947 		  info->ops[0].val = gimple_assign_rhs1 (info->stmt);
2948 		  info->ops[0].base_addr = NULL_TREE;
2949 		}
2950 	      else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
2951 		{
2952 		  store_immediate_info *infoj;
2953 		  unsigned int j;
2954 		  FOR_EACH_VEC_ELT (merged_store->stores, j, infoj)
2955 		    {
2956 		      infoj->rhs_code = BIT_INSERT_EXPR;
2957 		      infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
2958 		      infoj->ops[0].base_addr = NULL_TREE;
2959 		    }
2960 		}
2961 	      if ((infof->ops[0].base_addr
2962 		   ? compatible_load_p (merged_store, info, base_addr, 0)
2963 		   : !info->ops[0].base_addr)
2964 		  && (infof->ops[1].base_addr
2965 		      ? compatible_load_p (merged_store, info, base_addr, 1)
2966 		      : !info->ops[1].base_addr))
2967 		{
2968 		  merged_store->merge_into (info);
2969 		  goto done;
2970 		}
2971 	    }
2972 	}
2973 
2974       /* |---store 1---| <gap> |---store 2---|.
2975 	 Gap between stores or the rhs not compatible.  Start a new group.  */
2976 
2977       /* Try to apply all the stores recorded for the group to determine
2978 	 the bitpattern they write and discard it if that fails.
2979 	 This will also reject single-store groups.  */
2980       if (merged_store->apply_stores ())
2981 	m_merged_store_groups.safe_push (merged_store);
2982       else
2983 	delete merged_store;
2984 
2985       merged_store = new merged_store_group (info);
2986       end_earlier = i;
2987       if (dump_file && (dump_flags & TDF_DETAILS))
2988 	fputs ("New store group\n", dump_file);
2989 
2990     done:
2991       if (dump_file && (dump_flags & TDF_DETAILS))
2992 	{
2993 	  fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
2994 			      " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
2995 		   i, info->bitsize, info->bitpos);
2996 	  print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
2997 	  fputc ('\n', dump_file);
2998 	}
2999     }
3000 
3001   /* Record or discard the last store group.  */
3002   if (merged_store)
3003     {
3004       if (merged_store->apply_stores ())
3005 	m_merged_store_groups.safe_push (merged_store);
3006       else
3007 	delete merged_store;
3008     }
3009 
3010   gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
3011 
3012   bool success
3013     = !m_merged_store_groups.is_empty ()
3014       && m_merged_store_groups.length () < m_store_info.length ();
3015 
3016   if (success && dump_file)
3017     fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
3018 	     m_merged_store_groups.length ());
3019 
3020   return success;
3021 }
3022 
3023 /* Return the type to use for the merged stores or loads described by STMTS.
3024    This is needed to get the alias sets right.  If IS_LOAD, look for rhs,
3025    otherwise lhs.  Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3026    of the MEM_REFs if any.  */
3027 
3028 static tree
get_alias_type_for_stmts(vec<gimple * > & stmts,bool is_load,unsigned short * cliquep,unsigned short * basep)3029 get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3030 			  unsigned short *cliquep, unsigned short *basep)
3031 {
3032   gimple *stmt;
3033   unsigned int i;
3034   tree type = NULL_TREE;
3035   tree ret = NULL_TREE;
3036   *cliquep = 0;
3037   *basep = 0;
3038 
3039   FOR_EACH_VEC_ELT (stmts, i, stmt)
3040     {
3041       tree ref = is_load ? gimple_assign_rhs1 (stmt)
3042 			 : gimple_assign_lhs (stmt);
3043       tree type1 = reference_alias_ptr_type (ref);
3044       tree base = get_base_address (ref);
3045 
3046       if (i == 0)
3047 	{
3048 	  if (TREE_CODE (base) == MEM_REF)
3049 	    {
3050 	      *cliquep = MR_DEPENDENCE_CLIQUE (base);
3051 	      *basep = MR_DEPENDENCE_BASE (base);
3052 	    }
3053 	  ret = type = type1;
3054 	  continue;
3055 	}
3056       if (!alias_ptr_types_compatible_p (type, type1))
3057 	ret = ptr_type_node;
3058       if (TREE_CODE (base) != MEM_REF
3059 	  || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3060 	  || *basep != MR_DEPENDENCE_BASE (base))
3061 	{
3062 	  *cliquep = 0;
3063 	  *basep = 0;
3064 	}
3065     }
3066   return ret;
3067 }
3068 
3069 /* Return the location_t information we can find among the statements
3070    in STMTS.  */
3071 
3072 static location_t
get_location_for_stmts(vec<gimple * > & stmts)3073 get_location_for_stmts (vec<gimple *> &stmts)
3074 {
3075   gimple *stmt;
3076   unsigned int i;
3077 
3078   FOR_EACH_VEC_ELT (stmts, i, stmt)
3079     if (gimple_has_location (stmt))
3080       return gimple_location (stmt);
3081 
3082   return UNKNOWN_LOCATION;
3083 }
3084 
3085 /* Used to decribe a store resulting from splitting a wide store in smaller
3086    regularly-sized stores in split_group.  */
3087 
3088 struct split_store
3089 {
3090   unsigned HOST_WIDE_INT bytepos;
3091   unsigned HOST_WIDE_INT size;
3092   unsigned HOST_WIDE_INT align;
3093   auto_vec<store_immediate_info *> orig_stores;
3094   /* True if there is a single orig stmt covering the whole split store.  */
3095   bool orig;
3096   split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3097 	       unsigned HOST_WIDE_INT);
3098 };
3099 
3100 /* Simple constructor.  */
3101 
split_store(unsigned HOST_WIDE_INT bp,unsigned HOST_WIDE_INT sz,unsigned HOST_WIDE_INT al)3102 split_store::split_store (unsigned HOST_WIDE_INT bp,
3103 			  unsigned HOST_WIDE_INT sz,
3104 			  unsigned HOST_WIDE_INT al)
3105 			  : bytepos (bp), size (sz), align (al), orig (false)
3106 {
3107   orig_stores.create (0);
3108 }
3109 
3110 /* Record all stores in GROUP that write to the region starting at BITPOS and
3111    is of size BITSIZE.  Record infos for such statements in STORES if
3112    non-NULL.  The stores in GROUP must be sorted by bitposition.  Return INFO
3113    if there is exactly one original store in the range.  */
3114 
3115 static store_immediate_info *
find_constituent_stores(struct merged_store_group * group,vec<store_immediate_info * > * stores,unsigned int * first,unsigned HOST_WIDE_INT bitpos,unsigned HOST_WIDE_INT bitsize)3116 find_constituent_stores (struct merged_store_group *group,
3117 			 vec<store_immediate_info *> *stores,
3118 			 unsigned int *first,
3119 			 unsigned HOST_WIDE_INT bitpos,
3120 			 unsigned HOST_WIDE_INT bitsize)
3121 {
3122   store_immediate_info *info, *ret = NULL;
3123   unsigned int i;
3124   bool second = false;
3125   bool update_first = true;
3126   unsigned HOST_WIDE_INT end = bitpos + bitsize;
3127   for (i = *first; group->stores.iterate (i, &info); ++i)
3128     {
3129       unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3130       unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
3131       if (stmt_end <= bitpos)
3132 	{
3133 	  /* BITPOS passed to this function never decreases from within the
3134 	     same split_group call, so optimize and don't scan info records
3135 	     which are known to end before or at BITPOS next time.
3136 	     Only do it if all stores before this one also pass this.  */
3137 	  if (update_first)
3138 	    *first = i + 1;
3139 	  continue;
3140 	}
3141       else
3142 	update_first = false;
3143 
3144       /* The stores in GROUP are ordered by bitposition so if we're past
3145 	 the region for this group return early.  */
3146       if (stmt_start >= end)
3147 	return ret;
3148 
3149       if (stores)
3150 	{
3151 	  stores->safe_push (info);
3152 	  if (ret)
3153 	    {
3154 	      ret = NULL;
3155 	      second = true;
3156 	    }
3157 	}
3158       else if (ret)
3159 	return NULL;
3160       if (!second)
3161 	ret = info;
3162     }
3163   return ret;
3164 }
3165 
3166 /* Return how many SSA_NAMEs used to compute value to store in the INFO
3167    store have multiple uses.  If any SSA_NAME has multiple uses, also
3168    count statements needed to compute it.  */
3169 
3170 static unsigned
count_multiple_uses(store_immediate_info * info)3171 count_multiple_uses (store_immediate_info *info)
3172 {
3173   gimple *stmt = info->stmt;
3174   unsigned ret = 0;
3175   switch (info->rhs_code)
3176     {
3177     case INTEGER_CST:
3178       return 0;
3179     case BIT_AND_EXPR:
3180     case BIT_IOR_EXPR:
3181     case BIT_XOR_EXPR:
3182       if (info->bit_not_p)
3183 	{
3184 	  if (!has_single_use (gimple_assign_rhs1 (stmt)))
3185 	    ret = 1; /* Fall through below to return
3186 			the BIT_NOT_EXPR stmt and then
3187 			BIT_{AND,IOR,XOR}_EXPR and anything it
3188 			uses.  */
3189 	  else
3190 	    /* stmt is after this the BIT_NOT_EXPR.  */
3191 	    stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3192 	}
3193       if (!has_single_use (gimple_assign_rhs1 (stmt)))
3194 	{
3195 	  ret += 1 + info->ops[0].bit_not_p;
3196 	  if (info->ops[1].base_addr)
3197 	    ret += 1 + info->ops[1].bit_not_p;
3198 	  return ret + 1;
3199 	}
3200       stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3201       /* stmt is now the BIT_*_EXPR.  */
3202       if (!has_single_use (gimple_assign_rhs1 (stmt)))
3203 	ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3204       else if (info->ops[info->ops_swapped_p].bit_not_p)
3205 	{
3206 	  gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3207 	  if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3208 	    ++ret;
3209 	}
3210       if (info->ops[1].base_addr == NULL_TREE)
3211 	{
3212 	  gcc_checking_assert (!info->ops_swapped_p);
3213 	  return ret;
3214 	}
3215       if (!has_single_use (gimple_assign_rhs2 (stmt)))
3216 	ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3217       else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
3218 	{
3219 	  gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3220 	  if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3221 	    ++ret;
3222 	}
3223       return ret;
3224     case MEM_REF:
3225       if (!has_single_use (gimple_assign_rhs1 (stmt)))
3226 	return 1 + info->ops[0].bit_not_p;
3227       else if (info->ops[0].bit_not_p)
3228 	{
3229 	  stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3230 	  if (!has_single_use (gimple_assign_rhs1 (stmt)))
3231 	    return 1;
3232 	}
3233       return 0;
3234     case BIT_INSERT_EXPR:
3235       return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
3236     default:
3237       gcc_unreachable ();
3238     }
3239 }
3240 
3241 /* Split a merged store described by GROUP by populating the SPLIT_STORES
3242    vector (if non-NULL) with split_store structs describing the byte offset
3243    (from the base), the bit size and alignment of each store as well as the
3244    original statements involved in each such split group.
3245    This is to separate the splitting strategy from the statement
3246    building/emission/linking done in output_merged_store.
3247    Return number of new stores.
3248    If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3249    If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3250    If SPLIT_STORES is NULL, it is just a dry run to count number of
3251    new stores.  */
3252 
3253 static unsigned int
split_group(merged_store_group * group,bool allow_unaligned_store,bool allow_unaligned_load,vec<struct split_store * > * split_stores,unsigned * total_orig,unsigned * total_new)3254 split_group (merged_store_group *group, bool allow_unaligned_store,
3255 	     bool allow_unaligned_load,
3256 	     vec<struct split_store *> *split_stores,
3257 	     unsigned *total_orig,
3258 	     unsigned *total_new)
3259 {
3260   unsigned HOST_WIDE_INT pos = group->bitregion_start;
3261   unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
3262   unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
3263   unsigned HOST_WIDE_INT group_align = group->align;
3264   unsigned HOST_WIDE_INT align_base = group->align_base;
3265   unsigned HOST_WIDE_INT group_load_align = group_align;
3266   bool any_orig = false;
3267 
3268   gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3269 
3270   if (group->stores[0]->rhs_code == LROTATE_EXPR
3271       || group->stores[0]->rhs_code == NOP_EXPR)
3272     {
3273       /* For bswap framework using sets of stores, all the checking
3274 	 has been done earlier in try_coalesce_bswap and needs to be
3275 	 emitted as a single store.  */
3276       if (total_orig)
3277 	{
3278 	  /* Avoid the old/new stmt count heuristics.  It should be
3279 	     always beneficial.  */
3280 	  total_new[0] = 1;
3281 	  total_orig[0] = 2;
3282 	}
3283 
3284       if (split_stores)
3285 	{
3286 	  unsigned HOST_WIDE_INT align_bitpos
3287 	    = (group->start - align_base) & (group_align - 1);
3288 	  unsigned HOST_WIDE_INT align = group_align;
3289 	  if (align_bitpos)
3290 	    align = least_bit_hwi (align_bitpos);
3291 	  bytepos = group->start / BITS_PER_UNIT;
3292 	  struct split_store *store
3293 	    = new split_store (bytepos, group->width, align);
3294 	  unsigned int first = 0;
3295 	  find_constituent_stores (group, &store->orig_stores,
3296 				   &first, group->start, group->width);
3297 	  split_stores->safe_push (store);
3298 	}
3299 
3300       return 1;
3301     }
3302 
3303   unsigned int ret = 0, first = 0;
3304   unsigned HOST_WIDE_INT try_pos = bytepos;
3305 
3306   if (total_orig)
3307     {
3308       unsigned int i;
3309       store_immediate_info *info = group->stores[0];
3310 
3311       total_new[0] = 0;
3312       total_orig[0] = 1; /* The orig store.  */
3313       info = group->stores[0];
3314       if (info->ops[0].base_addr)
3315 	total_orig[0]++;
3316       if (info->ops[1].base_addr)
3317 	total_orig[0]++;
3318       switch (info->rhs_code)
3319 	{
3320 	case BIT_AND_EXPR:
3321 	case BIT_IOR_EXPR:
3322 	case BIT_XOR_EXPR:
3323 	  total_orig[0]++; /* The orig BIT_*_EXPR stmt.  */
3324 	  break;
3325 	default:
3326 	  break;
3327 	}
3328       total_orig[0] *= group->stores.length ();
3329 
3330       FOR_EACH_VEC_ELT (group->stores, i, info)
3331 	{
3332 	  total_new[0] += count_multiple_uses (info);
3333 	  total_orig[0] += (info->bit_not_p
3334 			    + info->ops[0].bit_not_p
3335 			    + info->ops[1].bit_not_p);
3336 	}
3337     }
3338 
3339   if (!allow_unaligned_load)
3340     for (int i = 0; i < 2; ++i)
3341       if (group->load_align[i])
3342 	group_load_align = MIN (group_load_align, group->load_align[i]);
3343 
3344   while (size > 0)
3345     {
3346       if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3347 	  && group->mask[try_pos - bytepos] == (unsigned char) ~0U)
3348 	{
3349 	  /* Skip padding bytes.  */
3350 	  ++try_pos;
3351 	  size -= BITS_PER_UNIT;
3352 	  continue;
3353 	}
3354 
3355       unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3356       unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3357       unsigned HOST_WIDE_INT align_bitpos
3358 	= (try_bitpos - align_base) & (group_align - 1);
3359       unsigned HOST_WIDE_INT align = group_align;
3360       if (align_bitpos)
3361 	align = least_bit_hwi (align_bitpos);
3362       if (!allow_unaligned_store)
3363 	try_size = MIN (try_size, align);
3364       if (!allow_unaligned_load)
3365 	{
3366 	  /* If we can't do or don't want to do unaligned stores
3367 	     as well as loads, we need to take the loads into account
3368 	     as well.  */
3369 	  unsigned HOST_WIDE_INT load_align = group_load_align;
3370 	  align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3371 	  if (align_bitpos)
3372 	    load_align = least_bit_hwi (align_bitpos);
3373 	  for (int i = 0; i < 2; ++i)
3374 	    if (group->load_align[i])
3375 	      {
3376 		align_bitpos
3377 		  = known_alignment (try_bitpos
3378 				     - group->stores[0]->bitpos
3379 				     + group->stores[0]->ops[i].bitpos
3380 				     - group->load_align_base[i]);
3381 		if (align_bitpos & (group_load_align - 1))
3382 		  {
3383 		    unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3384 		    load_align = MIN (load_align, a);
3385 		  }
3386 	      }
3387 	  try_size = MIN (try_size, load_align);
3388 	}
3389       store_immediate_info *info
3390 	= find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
3391       if (info)
3392 	{
3393 	  /* If there is just one original statement for the range, see if
3394 	     we can just reuse the original store which could be even larger
3395 	     than try_size.  */
3396 	  unsigned HOST_WIDE_INT stmt_end
3397 	    = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
3398 	  info = find_constituent_stores (group, NULL, &first, try_bitpos,
3399 					  stmt_end - try_bitpos);
3400 	  if (info && info->bitpos >= try_bitpos)
3401 	    {
3402 	      try_size = stmt_end - try_bitpos;
3403 	      goto found;
3404 	    }
3405 	}
3406 
3407       /* Approximate store bitsize for the case when there are no padding
3408 	 bits.  */
3409       while (try_size > size)
3410 	try_size /= 2;
3411       /* Now look for whole padding bytes at the end of that bitsize.  */
3412       for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3413 	if (group->mask[try_pos - bytepos + nonmasked - 1]
3414 	    != (unsigned char) ~0U)
3415 	  break;
3416       if (nonmasked == 0)
3417 	{
3418 	  /* If entire try_size range is padding, skip it.  */
3419 	  try_pos += try_size / BITS_PER_UNIT;
3420 	  size -= try_size;
3421 	  continue;
3422 	}
3423       /* Otherwise try to decrease try_size if second half, last 3 quarters
3424 	 etc. are padding.  */
3425       nonmasked *= BITS_PER_UNIT;
3426       while (nonmasked <= try_size / 2)
3427 	try_size /= 2;
3428       if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
3429 	{
3430 	  /* Now look for whole padding bytes at the start of that bitsize.  */
3431 	  unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3432 	  for (masked = 0; masked < try_bytesize; ++masked)
3433 	    if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U)
3434 	      break;
3435 	  masked *= BITS_PER_UNIT;
3436 	  gcc_assert (masked < try_size);
3437 	  if (masked >= try_size / 2)
3438 	    {
3439 	      while (masked >= try_size / 2)
3440 		{
3441 		  try_size /= 2;
3442 		  try_pos += try_size / BITS_PER_UNIT;
3443 		  size -= try_size;
3444 		  masked -= try_size;
3445 		}
3446 	      /* Need to recompute the alignment, so just retry at the new
3447 		 position.  */
3448 	      continue;
3449 	    }
3450 	}
3451 
3452     found:
3453       ++ret;
3454 
3455       if (split_stores)
3456 	{
3457 	  struct split_store *store
3458 	    = new split_store (try_pos, try_size, align);
3459 	  info = find_constituent_stores (group, &store->orig_stores,
3460 					  &first, try_bitpos, try_size);
3461 	  if (info
3462 	      && info->bitpos >= try_bitpos
3463 	      && info->bitpos + info->bitsize <= try_bitpos + try_size)
3464 	    {
3465 	      store->orig = true;
3466 	      any_orig = true;
3467 	    }
3468 	  split_stores->safe_push (store);
3469 	}
3470 
3471       try_pos += try_size / BITS_PER_UNIT;
3472       size -= try_size;
3473     }
3474 
3475   if (total_orig)
3476     {
3477       unsigned int i;
3478       struct split_store *store;
3479       /* If we are reusing some original stores and any of the
3480 	 original SSA_NAMEs had multiple uses, we need to subtract
3481 	 those now before we add the new ones.  */
3482       if (total_new[0] && any_orig)
3483 	{
3484 	  FOR_EACH_VEC_ELT (*split_stores, i, store)
3485 	    if (store->orig)
3486 	      total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3487 	}
3488       total_new[0] += ret; /* The new store.  */
3489       store_immediate_info *info = group->stores[0];
3490       if (info->ops[0].base_addr)
3491 	total_new[0] += ret;
3492       if (info->ops[1].base_addr)
3493 	total_new[0] += ret;
3494       switch (info->rhs_code)
3495 	{
3496 	case BIT_AND_EXPR:
3497 	case BIT_IOR_EXPR:
3498 	case BIT_XOR_EXPR:
3499 	  total_new[0] += ret; /* The new BIT_*_EXPR stmt.  */
3500 	  break;
3501 	default:
3502 	  break;
3503 	}
3504       FOR_EACH_VEC_ELT (*split_stores, i, store)
3505 	{
3506 	  unsigned int j;
3507 	  bool bit_not_p[3] = { false, false, false };
3508 	  /* If all orig_stores have certain bit_not_p set, then
3509 	     we'd use a BIT_NOT_EXPR stmt and need to account for it.
3510 	     If some orig_stores have certain bit_not_p set, then
3511 	     we'd use a BIT_XOR_EXPR with a mask and need to account for
3512 	     it.  */
3513 	  FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3514 	    {
3515 	      if (info->ops[0].bit_not_p)
3516 		bit_not_p[0] = true;
3517 	      if (info->ops[1].bit_not_p)
3518 		bit_not_p[1] = true;
3519 	      if (info->bit_not_p)
3520 		bit_not_p[2] = true;
3521 	    }
3522 	  total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3523 	}
3524 
3525     }
3526 
3527   return ret;
3528 }
3529 
3530 /* Return the operation through which the operand IDX (if < 2) or
3531    result (IDX == 2) should be inverted.  If NOP_EXPR, no inversion
3532    is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3533    the bits should be xored with mask.  */
3534 
3535 static enum tree_code
invert_op(split_store * split_store,int idx,tree int_type,tree & mask)3536 invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3537 {
3538   unsigned int i;
3539   store_immediate_info *info;
3540   unsigned int cnt = 0;
3541   bool any_paddings = false;
3542   FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3543     {
3544       bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3545       if (bit_not_p)
3546 	{
3547 	  ++cnt;
3548 	  tree lhs = gimple_assign_lhs (info->stmt);
3549 	  if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3550 	      && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3551 	    any_paddings = true;
3552 	}
3553     }
3554   mask = NULL_TREE;
3555   if (cnt == 0)
3556     return NOP_EXPR;
3557   if (cnt == split_store->orig_stores.length () && !any_paddings)
3558     return BIT_NOT_EXPR;
3559 
3560   unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3561   unsigned buf_size = split_store->size / BITS_PER_UNIT;
3562   unsigned char *buf
3563     = XALLOCAVEC (unsigned char, buf_size);
3564   memset (buf, ~0U, buf_size);
3565   FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3566     {
3567       bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3568       if (!bit_not_p)
3569 	continue;
3570       /* Clear regions with bit_not_p and invert afterwards, rather than
3571 	 clear regions with !bit_not_p, so that gaps in between stores aren't
3572 	 set in the mask.  */
3573       unsigned HOST_WIDE_INT bitsize = info->bitsize;
3574       unsigned HOST_WIDE_INT prec = bitsize;
3575       unsigned int pos_in_buffer = 0;
3576       if (any_paddings)
3577 	{
3578 	  tree lhs = gimple_assign_lhs (info->stmt);
3579 	  if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3580 	      && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
3581 	    prec = TYPE_PRECISION (TREE_TYPE (lhs));
3582 	}
3583       if (info->bitpos < try_bitpos)
3584 	{
3585 	  gcc_assert (info->bitpos + bitsize > try_bitpos);
3586 	  if (!BYTES_BIG_ENDIAN)
3587 	    {
3588 	      if (prec <= try_bitpos - info->bitpos)
3589 		continue;
3590 	      prec -= try_bitpos - info->bitpos;
3591 	    }
3592 	  bitsize -= try_bitpos - info->bitpos;
3593 	  if (BYTES_BIG_ENDIAN && prec > bitsize)
3594 	    prec = bitsize;
3595 	}
3596       else
3597 	pos_in_buffer = info->bitpos - try_bitpos;
3598       if (prec < bitsize)
3599 	{
3600 	  /* If this is a bool inversion, invert just the least significant
3601 	     prec bits rather than all bits of it.  */
3602 	  if (BYTES_BIG_ENDIAN)
3603 	    {
3604 	      pos_in_buffer += bitsize - prec;
3605 	      if (pos_in_buffer >= split_store->size)
3606 		continue;
3607 	    }
3608 	  bitsize = prec;
3609 	}
3610       if (pos_in_buffer + bitsize > split_store->size)
3611 	bitsize = split_store->size - pos_in_buffer;
3612       unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
3613       if (BYTES_BIG_ENDIAN)
3614 	clear_bit_region_be (p, (BITS_PER_UNIT - 1
3615 				 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
3616       else
3617 	clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
3618     }
3619   for (unsigned int i = 0; i < buf_size; ++i)
3620     buf[i] = ~buf[i];
3621   mask = native_interpret_expr (int_type, buf, buf_size);
3622   return BIT_XOR_EXPR;
3623 }
3624 
3625 /* Given a merged store group GROUP output the widened version of it.
3626    The store chain is against the base object BASE.
3627    Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
3628    unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
3629    Make sure that the number of statements output is less than the number of
3630    original statements.  If a better sequence is possible emit it and
3631    return true.  */
3632 
3633 bool
output_merged_store(merged_store_group * group)3634 imm_store_chain_info::output_merged_store (merged_store_group *group)
3635 {
3636   split_store *split_store;
3637   unsigned int i;
3638   unsigned HOST_WIDE_INT start_byte_pos
3639     = group->bitregion_start / BITS_PER_UNIT;
3640 
3641   unsigned int orig_num_stmts = group->stores.length ();
3642   if (orig_num_stmts < 2)
3643     return false;
3644 
3645   auto_vec<struct split_store *, 32> split_stores;
3646   bool allow_unaligned_store
3647     = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
3648   bool allow_unaligned_load = allow_unaligned_store;
3649   if (allow_unaligned_store)
3650     {
3651       /* If unaligned stores are allowed, see how many stores we'd emit
3652 	 for unaligned and how many stores we'd emit for aligned stores.
3653 	 Only use unaligned stores if it allows fewer stores than aligned.  */
3654       unsigned aligned_cnt
3655 	= split_group (group, false, allow_unaligned_load, NULL, NULL, NULL);
3656       unsigned unaligned_cnt
3657 	= split_group (group, true, allow_unaligned_load, NULL, NULL, NULL);
3658       if (aligned_cnt <= unaligned_cnt)
3659 	allow_unaligned_store = false;
3660     }
3661   unsigned total_orig, total_new;
3662   split_group (group, allow_unaligned_store, allow_unaligned_load,
3663 	       &split_stores, &total_orig, &total_new);
3664 
3665   if (split_stores.length () >= orig_num_stmts)
3666     {
3667       /* We didn't manage to reduce the number of statements.  Bail out.  */
3668       if (dump_file && (dump_flags & TDF_DETAILS))
3669 	fprintf (dump_file, "Exceeded original number of stmts (%u)."
3670 			    "  Not profitable to emit new sequence.\n",
3671 		 orig_num_stmts);
3672       FOR_EACH_VEC_ELT (split_stores, i, split_store)
3673 	delete split_store;
3674       return false;
3675     }
3676   if (total_orig <= total_new)
3677     {
3678       /* If number of estimated new statements is above estimated original
3679 	 statements, bail out too.  */
3680       if (dump_file && (dump_flags & TDF_DETAILS))
3681 	fprintf (dump_file, "Estimated number of original stmts (%u)"
3682 			    " not larger than estimated number of new"
3683 			    " stmts (%u).\n",
3684 		 total_orig, total_new);
3685       FOR_EACH_VEC_ELT (split_stores, i, split_store)
3686 	delete split_store;
3687       return false;
3688     }
3689 
3690   gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
3691   gimple_seq seq = NULL;
3692   tree last_vdef, new_vuse;
3693   last_vdef = gimple_vdef (group->last_stmt);
3694   new_vuse = gimple_vuse (group->last_stmt);
3695   tree bswap_res = NULL_TREE;
3696 
3697   if (group->stores[0]->rhs_code == LROTATE_EXPR
3698       || group->stores[0]->rhs_code == NOP_EXPR)
3699     {
3700       tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
3701       gimple *ins_stmt = group->stores[0]->ins_stmt;
3702       struct symbolic_number *n = &group->stores[0]->n;
3703       bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
3704 
3705       switch (n->range)
3706 	{
3707 	case 16:
3708 	  load_type = bswap_type = uint16_type_node;
3709 	  break;
3710 	case 32:
3711 	  load_type = uint32_type_node;
3712 	  if (bswap)
3713 	    {
3714 	      fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
3715 	      bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3716 	    }
3717 	  break;
3718 	case 64:
3719 	  load_type = uint64_type_node;
3720 	  if (bswap)
3721 	    {
3722 	      fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
3723 	      bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3724 	    }
3725 	  break;
3726 	default:
3727 	  gcc_unreachable ();
3728 	}
3729 
3730       /* If the loads have each vuse of the corresponding store,
3731 	 we've checked the aliasing already in try_coalesce_bswap and
3732 	 we want to sink the need load into seq.  So need to use new_vuse
3733 	 on the load.  */
3734       if (n->base_addr)
3735 	{
3736 	  if (n->vuse == NULL)
3737 	    {
3738 	      n->vuse = new_vuse;
3739 	      ins_stmt = NULL;
3740 	    }
3741 	  else
3742 	    /* Update vuse in case it has changed by output_merged_stores.  */
3743 	    n->vuse = gimple_vuse (ins_stmt);
3744 	}
3745       bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
3746 				 bswap_type, load_type, n, bswap);
3747       gcc_assert (bswap_res);
3748     }
3749 
3750   gimple *stmt = NULL;
3751   auto_vec<gimple *, 32> orig_stmts;
3752   gimple_seq this_seq;
3753   tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
3754 				      is_gimple_mem_ref_addr, NULL_TREE);
3755   gimple_seq_add_seq_without_update (&seq, this_seq);
3756 
3757   tree load_addr[2] = { NULL_TREE, NULL_TREE };
3758   gimple_seq load_seq[2] = { NULL, NULL };
3759   gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
3760   for (int j = 0; j < 2; ++j)
3761     {
3762       store_operand_info &op = group->stores[0]->ops[j];
3763       if (op.base_addr == NULL_TREE)
3764 	continue;
3765 
3766       store_immediate_info *infol = group->stores.last ();
3767       if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
3768 	{
3769 	  /* We can't pick the location randomly; while we've verified
3770 	     all the loads have the same vuse, they can be still in different
3771 	     basic blocks and we need to pick the one from the last bb:
3772 	     int x = q[0];
3773 	     if (x == N) return;
3774 	     int y = q[1];
3775 	     p[0] = x;
3776 	     p[1] = y;
3777 	     otherwise if we put the wider load at the q[0] load, we might
3778 	     segfault if q[1] is not mapped.  */
3779 	  basic_block bb = gimple_bb (op.stmt);
3780 	  gimple *ostmt = op.stmt;
3781 	  store_immediate_info *info;
3782 	  FOR_EACH_VEC_ELT (group->stores, i, info)
3783 	    {
3784 	      gimple *tstmt = info->ops[j].stmt;
3785 	      basic_block tbb = gimple_bb (tstmt);
3786 	      if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
3787 		{
3788 		  ostmt = tstmt;
3789 		  bb = tbb;
3790 		}
3791 	    }
3792 	  load_gsi[j] = gsi_for_stmt (ostmt);
3793 	  load_addr[j]
3794 	    = force_gimple_operand_1 (unshare_expr (op.base_addr),
3795 				      &load_seq[j], is_gimple_mem_ref_addr,
3796 				      NULL_TREE);
3797 	}
3798       else if (operand_equal_p (base_addr, op.base_addr, 0))
3799 	load_addr[j] = addr;
3800       else
3801 	{
3802 	  load_addr[j]
3803 	    = force_gimple_operand_1 (unshare_expr (op.base_addr),
3804 				      &this_seq, is_gimple_mem_ref_addr,
3805 				      NULL_TREE);
3806 	  gimple_seq_add_seq_without_update (&seq, this_seq);
3807 	}
3808     }
3809 
3810   FOR_EACH_VEC_ELT (split_stores, i, split_store)
3811     {
3812       unsigned HOST_WIDE_INT try_size = split_store->size;
3813       unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
3814       unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3815       unsigned HOST_WIDE_INT align = split_store->align;
3816       tree dest, src;
3817       location_t loc;
3818       if (split_store->orig)
3819 	{
3820 	  /* If there is just a single constituent store which covers
3821 	     the whole area, just reuse the lhs and rhs.  */
3822 	  gimple *orig_stmt = split_store->orig_stores[0]->stmt;
3823 	  dest = gimple_assign_lhs (orig_stmt);
3824 	  src = gimple_assign_rhs1 (orig_stmt);
3825 	  loc = gimple_location (orig_stmt);
3826 	}
3827       else
3828 	{
3829 	  store_immediate_info *info;
3830 	  unsigned short clique, base;
3831 	  unsigned int k;
3832 	  FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
3833 	    orig_stmts.safe_push (info->stmt);
3834 	  tree offset_type
3835 	    = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
3836 	  loc = get_location_for_stmts (orig_stmts);
3837 	  orig_stmts.truncate (0);
3838 
3839 	  tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED);
3840 	  int_type = build_aligned_type (int_type, align);
3841 	  dest = fold_build2 (MEM_REF, int_type, addr,
3842 			      build_int_cst (offset_type, try_pos));
3843 	  if (TREE_CODE (dest) == MEM_REF)
3844 	    {
3845 	      MR_DEPENDENCE_CLIQUE (dest) = clique;
3846 	      MR_DEPENDENCE_BASE (dest) = base;
3847 	    }
3848 
3849 	  tree mask;
3850 	  if (bswap_res)
3851 	    mask = integer_zero_node;
3852 	  else
3853 	    mask = native_interpret_expr (int_type,
3854 					  group->mask + try_pos
3855 					  - start_byte_pos,
3856 					  group->buf_size);
3857 
3858 	  tree ops[2];
3859 	  for (int j = 0;
3860 	       j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
3861 	       ++j)
3862 	    {
3863 	      store_operand_info &op = split_store->orig_stores[0]->ops[j];
3864 	      if (bswap_res)
3865 		ops[j] = bswap_res;
3866 	      else if (op.base_addr)
3867 		{
3868 		  FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
3869 		    orig_stmts.safe_push (info->ops[j].stmt);
3870 
3871 		  offset_type = get_alias_type_for_stmts (orig_stmts, true,
3872 							  &clique, &base);
3873 		  location_t load_loc = get_location_for_stmts (orig_stmts);
3874 		  orig_stmts.truncate (0);
3875 
3876 		  unsigned HOST_WIDE_INT load_align = group->load_align[j];
3877 		  unsigned HOST_WIDE_INT align_bitpos
3878 		    = known_alignment (try_bitpos
3879 				       - split_store->orig_stores[0]->bitpos
3880 				       + op.bitpos);
3881 		  if (align_bitpos & (load_align - 1))
3882 		    load_align = least_bit_hwi (align_bitpos);
3883 
3884 		  tree load_int_type
3885 		    = build_nonstandard_integer_type (try_size, UNSIGNED);
3886 		  load_int_type
3887 		    = build_aligned_type (load_int_type, load_align);
3888 
3889 		  poly_uint64 load_pos
3890 		    = exact_div (try_bitpos
3891 				 - split_store->orig_stores[0]->bitpos
3892 				 + op.bitpos,
3893 				 BITS_PER_UNIT);
3894 		  ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
3895 					build_int_cst (offset_type, load_pos));
3896 		  if (TREE_CODE (ops[j]) == MEM_REF)
3897 		    {
3898 		      MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
3899 		      MR_DEPENDENCE_BASE (ops[j]) = base;
3900 		    }
3901 		  if (!integer_zerop (mask))
3902 		    /* The load might load some bits (that will be masked off
3903 		       later on) uninitialized, avoid -W*uninitialized
3904 		       warnings in that case.  */
3905 		    TREE_NO_WARNING (ops[j]) = 1;
3906 
3907 		  stmt = gimple_build_assign (make_ssa_name (int_type),
3908 					      ops[j]);
3909 		  gimple_set_location (stmt, load_loc);
3910 		  if (gsi_bb (load_gsi[j]))
3911 		    {
3912 		      gimple_set_vuse (stmt, gimple_vuse (op.stmt));
3913 		      gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
3914 		    }
3915 		  else
3916 		    {
3917 		      gimple_set_vuse (stmt, new_vuse);
3918 		      gimple_seq_add_stmt_without_update (&seq, stmt);
3919 		    }
3920 		  ops[j] = gimple_assign_lhs (stmt);
3921 		  tree xor_mask;
3922 		  enum tree_code inv_op
3923 		    = invert_op (split_store, j, int_type, xor_mask);
3924 		  if (inv_op != NOP_EXPR)
3925 		    {
3926 		      stmt = gimple_build_assign (make_ssa_name (int_type),
3927 						  inv_op, ops[j], xor_mask);
3928 		      gimple_set_location (stmt, load_loc);
3929 		      ops[j] = gimple_assign_lhs (stmt);
3930 
3931 		      if (gsi_bb (load_gsi[j]))
3932 			gimple_seq_add_stmt_without_update (&load_seq[j],
3933 							    stmt);
3934 		      else
3935 			gimple_seq_add_stmt_without_update (&seq, stmt);
3936 		    }
3937 		}
3938 	      else
3939 		ops[j] = native_interpret_expr (int_type,
3940 						group->val + try_pos
3941 						- start_byte_pos,
3942 						group->buf_size);
3943 	    }
3944 
3945 	  switch (split_store->orig_stores[0]->rhs_code)
3946 	    {
3947 	    case BIT_AND_EXPR:
3948 	    case BIT_IOR_EXPR:
3949 	    case BIT_XOR_EXPR:
3950 	      FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
3951 		{
3952 		  tree rhs1 = gimple_assign_rhs1 (info->stmt);
3953 		  orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
3954 		}
3955 	      location_t bit_loc;
3956 	      bit_loc = get_location_for_stmts (orig_stmts);
3957 	      orig_stmts.truncate (0);
3958 
3959 	      stmt
3960 		= gimple_build_assign (make_ssa_name (int_type),
3961 				       split_store->orig_stores[0]->rhs_code,
3962 				       ops[0], ops[1]);
3963 	      gimple_set_location (stmt, bit_loc);
3964 	      /* If there is just one load and there is a separate
3965 		 load_seq[0], emit the bitwise op right after it.  */
3966 	      if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
3967 		gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
3968 	      /* Otherwise, if at least one load is in seq, we need to
3969 		 emit the bitwise op right before the store.  If there
3970 		 are two loads and are emitted somewhere else, it would
3971 		 be better to emit the bitwise op as early as possible;
3972 		 we don't track where that would be possible right now
3973 		 though.  */
3974 	      else
3975 		gimple_seq_add_stmt_without_update (&seq, stmt);
3976 	      src = gimple_assign_lhs (stmt);
3977 	      tree xor_mask;
3978 	      enum tree_code inv_op;
3979 	      inv_op = invert_op (split_store, 2, int_type, xor_mask);
3980 	      if (inv_op != NOP_EXPR)
3981 		{
3982 		  stmt = gimple_build_assign (make_ssa_name (int_type),
3983 					      inv_op, src, xor_mask);
3984 		  gimple_set_location (stmt, bit_loc);
3985 		  if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
3986 		    gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
3987 		  else
3988 		    gimple_seq_add_stmt_without_update (&seq, stmt);
3989 		  src = gimple_assign_lhs (stmt);
3990 		}
3991 	      break;
3992 	    case LROTATE_EXPR:
3993 	    case NOP_EXPR:
3994 	      src = ops[0];
3995 	      if (!is_gimple_val (src))
3996 		{
3997 		  stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
3998 					      src);
3999 		  gimple_seq_add_stmt_without_update (&seq, stmt);
4000 		  src = gimple_assign_lhs (stmt);
4001 		}
4002 	      if (!useless_type_conversion_p (int_type, TREE_TYPE (src)))
4003 		{
4004 		  stmt = gimple_build_assign (make_ssa_name (int_type),
4005 					      NOP_EXPR, src);
4006 		  gimple_seq_add_stmt_without_update (&seq, stmt);
4007 		  src = gimple_assign_lhs (stmt);
4008 		}
4009 	      inv_op = invert_op (split_store, 2, int_type, xor_mask);
4010 	      if (inv_op != NOP_EXPR)
4011 		{
4012 		  stmt = gimple_build_assign (make_ssa_name (int_type),
4013 					      inv_op, src, xor_mask);
4014 		  gimple_set_location (stmt, loc);
4015 		  gimple_seq_add_stmt_without_update (&seq, stmt);
4016 		  src = gimple_assign_lhs (stmt);
4017 		}
4018 	      break;
4019 	    default:
4020 	      src = ops[0];
4021 	      break;
4022 	    }
4023 
4024 	  /* If bit insertion is required, we use the source as an accumulator
4025 	     into which the successive bit-field values are manually inserted.
4026 	     FIXME: perhaps use BIT_INSERT_EXPR instead in some cases?  */
4027 	  if (group->bit_insertion)
4028 	    FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4029 	      if (info->rhs_code == BIT_INSERT_EXPR
4030 		  && info->bitpos < try_bitpos + try_size
4031 		  && info->bitpos + info->bitsize > try_bitpos)
4032 		{
4033 		  /* Mask, truncate, convert to final type, shift and ior into
4034 		     the accumulator.  Note that every step can be a no-op.  */
4035 		  const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4036 		  const HOST_WIDE_INT end_gap
4037 		    = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4038 		  tree tem = info->ops[0].val;
4039 		  if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4040 		    {
4041 		      tree bitfield_type
4042 			= build_nonstandard_integer_type (info->bitsize,
4043 							  UNSIGNED);
4044 		      tem = gimple_convert (&seq, loc, bitfield_type, tem);
4045 		    }
4046 		  else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
4047 		    {
4048 		      const unsigned HOST_WIDE_INT imask
4049 			= (HOST_WIDE_INT_1U << info->bitsize) - 1;
4050 		      tem = gimple_build (&seq, loc,
4051 					  BIT_AND_EXPR, TREE_TYPE (tem), tem,
4052 					  build_int_cst (TREE_TYPE (tem),
4053 							 imask));
4054 		    }
4055 		  const HOST_WIDE_INT shift
4056 		    = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4057 		  if (shift < 0)
4058 		    tem = gimple_build (&seq, loc,
4059 					RSHIFT_EXPR, TREE_TYPE (tem), tem,
4060 					build_int_cst (NULL_TREE, -shift));
4061 		  tem = gimple_convert (&seq, loc, int_type, tem);
4062 		  if (shift > 0)
4063 		    tem = gimple_build (&seq, loc,
4064 					LSHIFT_EXPR, int_type, tem,
4065 					build_int_cst (NULL_TREE, shift));
4066 		  src = gimple_build (&seq, loc,
4067 				      BIT_IOR_EXPR, int_type, tem, src);
4068 		}
4069 
4070 	  if (!integer_zerop (mask))
4071 	    {
4072 	      tree tem = make_ssa_name (int_type);
4073 	      tree load_src = unshare_expr (dest);
4074 	      /* The load might load some or all bits uninitialized,
4075 		 avoid -W*uninitialized warnings in that case.
4076 		 As optimization, it would be nice if all the bits are
4077 		 provably uninitialized (no stores at all yet or previous
4078 		 store a CLOBBER) we'd optimize away the load and replace
4079 		 it e.g. with 0.  */
4080 	      TREE_NO_WARNING (load_src) = 1;
4081 	      stmt = gimple_build_assign (tem, load_src);
4082 	      gimple_set_location (stmt, loc);
4083 	      gimple_set_vuse (stmt, new_vuse);
4084 	      gimple_seq_add_stmt_without_update (&seq, stmt);
4085 
4086 	      /* FIXME: If there is a single chunk of zero bits in mask,
4087 		 perhaps use BIT_INSERT_EXPR instead?  */
4088 	      stmt = gimple_build_assign (make_ssa_name (int_type),
4089 					  BIT_AND_EXPR, tem, mask);
4090 	      gimple_set_location (stmt, loc);
4091 	      gimple_seq_add_stmt_without_update (&seq, stmt);
4092 	      tem = gimple_assign_lhs (stmt);
4093 
4094 	      if (TREE_CODE (src) == INTEGER_CST)
4095 		src = wide_int_to_tree (int_type,
4096 					wi::bit_and_not (wi::to_wide (src),
4097 							 wi::to_wide (mask)));
4098 	      else
4099 		{
4100 		  tree nmask
4101 		    = wide_int_to_tree (int_type,
4102 					wi::bit_not (wi::to_wide (mask)));
4103 		  stmt = gimple_build_assign (make_ssa_name (int_type),
4104 					      BIT_AND_EXPR, src, nmask);
4105 		  gimple_set_location (stmt, loc);
4106 		  gimple_seq_add_stmt_without_update (&seq, stmt);
4107 		  src = gimple_assign_lhs (stmt);
4108 		}
4109 	      stmt = gimple_build_assign (make_ssa_name (int_type),
4110 					  BIT_IOR_EXPR, tem, src);
4111 	      gimple_set_location (stmt, loc);
4112 	      gimple_seq_add_stmt_without_update (&seq, stmt);
4113 	      src = gimple_assign_lhs (stmt);
4114 	    }
4115 	}
4116 
4117       stmt = gimple_build_assign (dest, src);
4118       gimple_set_location (stmt, loc);
4119       gimple_set_vuse (stmt, new_vuse);
4120       gimple_seq_add_stmt_without_update (&seq, stmt);
4121 
4122       tree new_vdef;
4123       if (i < split_stores.length () - 1)
4124 	new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
4125       else
4126 	new_vdef = last_vdef;
4127 
4128       gimple_set_vdef (stmt, new_vdef);
4129       SSA_NAME_DEF_STMT (new_vdef) = stmt;
4130       new_vuse = new_vdef;
4131     }
4132 
4133   FOR_EACH_VEC_ELT (split_stores, i, split_store)
4134     delete split_store;
4135 
4136   gcc_assert (seq);
4137   if (dump_file)
4138     {
4139       fprintf (dump_file,
4140 	       "New sequence of %u stores to replace old one of %u stores\n",
4141 	       split_stores.length (), orig_num_stmts);
4142       if (dump_flags & TDF_DETAILS)
4143 	print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4144     }
4145   gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4146   for (int j = 0; j < 2; ++j)
4147     if (load_seq[j])
4148       gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
4149 
4150   return true;
4151 }
4152 
4153 /* Process the merged_store_group objects created in the coalescing phase.
4154    The stores are all against the base object BASE.
4155    Try to output the widened stores and delete the original statements if
4156    successful.  Return true iff any changes were made.  */
4157 
4158 bool
output_merged_stores()4159 imm_store_chain_info::output_merged_stores ()
4160 {
4161   unsigned int i;
4162   merged_store_group *merged_store;
4163   bool ret = false;
4164   FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4165     {
4166       if (output_merged_store (merged_store))
4167 	{
4168 	  unsigned int j;
4169 	  store_immediate_info *store;
4170 	  FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4171 	    {
4172 	      gimple *stmt = store->stmt;
4173 	      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4174 	      gsi_remove (&gsi, true);
4175 	      if (stmt != merged_store->last_stmt)
4176 		{
4177 		  unlink_stmt_vdef (stmt);
4178 		  release_defs (stmt);
4179 		}
4180 	    }
4181 	  ret = true;
4182 	}
4183     }
4184   if (ret && dump_file)
4185     fprintf (dump_file, "Merging successful!\n");
4186 
4187   return ret;
4188 }
4189 
4190 /* Coalesce the store_immediate_info objects recorded against the base object
4191    BASE in the first phase and output them.
4192    Delete the allocated structures.
4193    Return true if any changes were made.  */
4194 
4195 bool
terminate_and_process_chain()4196 imm_store_chain_info::terminate_and_process_chain ()
4197 {
4198   /* Process store chain.  */
4199   bool ret = false;
4200   if (m_store_info.length () > 1)
4201     {
4202       ret = coalesce_immediate_stores ();
4203       if (ret)
4204 	ret = output_merged_stores ();
4205     }
4206 
4207   /* Delete all the entries we allocated ourselves.  */
4208   store_immediate_info *info;
4209   unsigned int i;
4210   FOR_EACH_VEC_ELT (m_store_info, i, info)
4211     delete info;
4212 
4213   merged_store_group *merged_info;
4214   FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4215     delete merged_info;
4216 
4217   return ret;
4218 }
4219 
4220 /* Return true iff LHS is a destination potentially interesting for
4221    store merging.  In practice these are the codes that get_inner_reference
4222    can process.  */
4223 
4224 static bool
lhs_valid_for_store_merging_p(tree lhs)4225 lhs_valid_for_store_merging_p (tree lhs)
4226 {
4227   tree_code code = TREE_CODE (lhs);
4228 
4229   if (code == ARRAY_REF || code == ARRAY_RANGE_REF || code == MEM_REF
4230       || code == COMPONENT_REF || code == BIT_FIELD_REF)
4231     return true;
4232 
4233   return false;
4234 }
4235 
4236 /* Return true if the tree RHS is a constant we want to consider
4237    during store merging.  In practice accept all codes that
4238    native_encode_expr accepts.  */
4239 
4240 static bool
rhs_valid_for_store_merging_p(tree rhs)4241 rhs_valid_for_store_merging_p (tree rhs)
4242 {
4243   unsigned HOST_WIDE_INT size;
4244   return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4245 	  && native_encode_expr (rhs, NULL, size) != 0);
4246 }
4247 
4248 /* If MEM is a memory reference usable for store merging (either as
4249    store destination or for loads), return the non-NULL base_addr
4250    and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4251    Otherwise return NULL, *PBITPOS should be still valid even for that
4252    case.  */
4253 
4254 static tree
mem_valid_for_store_merging(tree mem,poly_uint64 * pbitsize,poly_uint64 * pbitpos,poly_uint64 * pbitregion_start,poly_uint64 * pbitregion_end)4255 mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4256 			     poly_uint64 *pbitpos,
4257 			     poly_uint64 *pbitregion_start,
4258 			     poly_uint64 *pbitregion_end)
4259 {
4260   poly_int64 bitsize, bitpos;
4261   poly_uint64 bitregion_start = 0, bitregion_end = 0;
4262   machine_mode mode;
4263   int unsignedp = 0, reversep = 0, volatilep = 0;
4264   tree offset;
4265   tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4266 					&unsignedp, &reversep, &volatilep);
4267   *pbitsize = bitsize;
4268   if (known_eq (bitsize, 0))
4269     return NULL_TREE;
4270 
4271   if (TREE_CODE (mem) == COMPONENT_REF
4272       && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4273     {
4274       get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
4275       if (maybe_ne (bitregion_end, 0U))
4276 	bitregion_end += 1;
4277     }
4278 
4279   if (reversep)
4280     return NULL_TREE;
4281 
4282   /* We do not want to rewrite TARGET_MEM_REFs.  */
4283   if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4284     return NULL_TREE;
4285   /* In some cases get_inner_reference may return a
4286      MEM_REF [ptr + byteoffset].  For the purposes of this pass
4287      canonicalize the base_addr to MEM_REF [ptr] and take
4288      byteoffset into account in the bitpos.  This occurs in
4289      PR 23684 and this way we can catch more chains.  */
4290   else if (TREE_CODE (base_addr) == MEM_REF)
4291     {
4292       poly_offset_int byte_off = mem_ref_offset (base_addr);
4293       poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4294       bit_off += bitpos;
4295       if (known_ge (bit_off, 0) && bit_off.to_shwi (&bitpos))
4296 	{
4297 	  if (maybe_ne (bitregion_end, 0U))
4298 	    {
4299 	      bit_off = byte_off << LOG2_BITS_PER_UNIT;
4300 	      bit_off += bitregion_start;
4301 	      if (bit_off.to_uhwi (&bitregion_start))
4302 		{
4303 		  bit_off = byte_off << LOG2_BITS_PER_UNIT;
4304 		  bit_off += bitregion_end;
4305 		  if (!bit_off.to_uhwi (&bitregion_end))
4306 		    bitregion_end = 0;
4307 		}
4308 	      else
4309 		bitregion_end = 0;
4310 	    }
4311 	}
4312       else
4313 	return NULL_TREE;
4314       base_addr = TREE_OPERAND (base_addr, 0);
4315     }
4316   /* get_inner_reference returns the base object, get at its
4317      address now.  */
4318   else
4319     {
4320       if (maybe_lt (bitpos, 0))
4321 	return NULL_TREE;
4322       base_addr = build_fold_addr_expr (base_addr);
4323     }
4324 
4325   if (known_eq (bitregion_end, 0U))
4326     {
4327       bitregion_start = round_down_to_byte_boundary (bitpos);
4328       bitregion_end = bitpos;
4329       bitregion_end = round_up_to_byte_boundary (bitregion_end + bitsize);
4330     }
4331 
4332   if (offset != NULL_TREE)
4333     {
4334       /* If the access is variable offset then a base decl has to be
4335 	 address-taken to be able to emit pointer-based stores to it.
4336 	 ???  We might be able to get away with re-using the original
4337 	 base up to the first variable part and then wrapping that inside
4338 	 a BIT_FIELD_REF.  */
4339       tree base = get_base_address (base_addr);
4340       if (! base
4341 	  || (DECL_P (base) && ! TREE_ADDRESSABLE (base)))
4342 	return NULL_TREE;
4343 
4344       base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
4345 			  base_addr, offset);
4346     }
4347 
4348   *pbitsize = bitsize;
4349   *pbitpos = bitpos;
4350   *pbitregion_start = bitregion_start;
4351   *pbitregion_end = bitregion_end;
4352   return base_addr;
4353 }
4354 
4355 /* Return true if STMT is a load that can be used for store merging.
4356    In that case fill in *OP.  BITSIZE, BITPOS, BITREGION_START and
4357    BITREGION_END are properties of the corresponding store.  */
4358 
4359 static bool
handled_load(gimple * stmt,store_operand_info * op,poly_uint64 bitsize,poly_uint64 bitpos,poly_uint64 bitregion_start,poly_uint64 bitregion_end)4360 handled_load (gimple *stmt, store_operand_info *op,
4361 	      poly_uint64 bitsize, poly_uint64 bitpos,
4362 	      poly_uint64 bitregion_start, poly_uint64 bitregion_end)
4363 {
4364   if (!is_gimple_assign (stmt))
4365     return false;
4366   if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
4367     {
4368       tree rhs1 = gimple_assign_rhs1 (stmt);
4369       if (TREE_CODE (rhs1) == SSA_NAME
4370 	  && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
4371 			   bitregion_start, bitregion_end))
4372 	{
4373 	  /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
4374 	     been optimized earlier, but if allowed here, would confuse the
4375 	     multiple uses counting.  */
4376 	  if (op->bit_not_p)
4377 	    return false;
4378 	  op->bit_not_p = !op->bit_not_p;
4379 	  return true;
4380 	}
4381       return false;
4382     }
4383   if (gimple_vuse (stmt)
4384       && gimple_assign_load_p (stmt)
4385       && !stmt_can_throw_internal (cfun, stmt)
4386       && !gimple_has_volatile_ops (stmt))
4387     {
4388       tree mem = gimple_assign_rhs1 (stmt);
4389       op->base_addr
4390 	= mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
4391 				       &op->bitregion_start,
4392 				       &op->bitregion_end);
4393       if (op->base_addr != NULL_TREE
4394 	  && known_eq (op->bitsize, bitsize)
4395 	  && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
4396 	  && known_ge (op->bitpos - op->bitregion_start,
4397 		       bitpos - bitregion_start)
4398 	  && known_ge (op->bitregion_end - op->bitpos,
4399 		       bitregion_end - bitpos))
4400 	{
4401 	  op->stmt = stmt;
4402 	  op->val = mem;
4403 	  op->bit_not_p = false;
4404 	  return true;
4405 	}
4406     }
4407   return false;
4408 }
4409 
4410 /* Record the store STMT for store merging optimization if it can be
4411    optimized.  */
4412 
4413 void
process_store(gimple * stmt)4414 pass_store_merging::process_store (gimple *stmt)
4415 {
4416   tree lhs = gimple_assign_lhs (stmt);
4417   tree rhs = gimple_assign_rhs1 (stmt);
4418   poly_uint64 bitsize, bitpos;
4419   poly_uint64 bitregion_start, bitregion_end;
4420   tree base_addr
4421     = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
4422 				   &bitregion_start, &bitregion_end);
4423   if (known_eq (bitsize, 0U))
4424     return;
4425 
4426   bool invalid = (base_addr == NULL_TREE
4427 		  || (maybe_gt (bitsize,
4428 				(unsigned int) MAX_BITSIZE_MODE_ANY_INT)
4429 		      && (TREE_CODE (rhs) != INTEGER_CST)));
4430   enum tree_code rhs_code = ERROR_MARK;
4431   bool bit_not_p = false;
4432   struct symbolic_number n;
4433   gimple *ins_stmt = NULL;
4434   store_operand_info ops[2];
4435   if (invalid)
4436     ;
4437   else if (rhs_valid_for_store_merging_p (rhs))
4438     {
4439       rhs_code = INTEGER_CST;
4440       ops[0].val = rhs;
4441     }
4442   else if (TREE_CODE (rhs) != SSA_NAME)
4443     invalid = true;
4444   else
4445     {
4446       gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
4447       if (!is_gimple_assign (def_stmt))
4448 	invalid = true;
4449       else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
4450 			     bitregion_start, bitregion_end))
4451 	rhs_code = MEM_REF;
4452       else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
4453 	{
4454 	  tree rhs1 = gimple_assign_rhs1 (def_stmt);
4455 	  if (TREE_CODE (rhs1) == SSA_NAME
4456 	      && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
4457 	    {
4458 	      bit_not_p = true;
4459 	      def_stmt = SSA_NAME_DEF_STMT (rhs1);
4460 	    }
4461 	}
4462 
4463       if (rhs_code == ERROR_MARK && !invalid)
4464 	switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
4465 	  {
4466 	  case BIT_AND_EXPR:
4467 	  case BIT_IOR_EXPR:
4468 	  case BIT_XOR_EXPR:
4469 	    tree rhs1, rhs2;
4470 	    rhs1 = gimple_assign_rhs1 (def_stmt);
4471 	    rhs2 = gimple_assign_rhs2 (def_stmt);
4472 	    invalid = true;
4473 	    if (TREE_CODE (rhs1) != SSA_NAME)
4474 	      break;
4475 	    def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
4476 	    if (!is_gimple_assign (def_stmt1)
4477 		|| !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
4478 				  bitregion_start, bitregion_end))
4479 	      break;
4480 	    if (rhs_valid_for_store_merging_p (rhs2))
4481 	      ops[1].val = rhs2;
4482 	    else if (TREE_CODE (rhs2) != SSA_NAME)
4483 	      break;
4484 	    else
4485 	      {
4486 		def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
4487 		if (!is_gimple_assign (def_stmt2))
4488 		  break;
4489 		else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
4490 					bitregion_start, bitregion_end))
4491 		  break;
4492 	      }
4493 	    invalid = false;
4494 	    break;
4495 	  default:
4496 	    invalid = true;
4497 	    break;
4498 	  }
4499 
4500       unsigned HOST_WIDE_INT const_bitsize;
4501       if (bitsize.is_constant (&const_bitsize)
4502 	  && (const_bitsize % BITS_PER_UNIT) == 0
4503 	  && const_bitsize <= 64
4504 	  && multiple_p (bitpos, BITS_PER_UNIT))
4505 	{
4506 	  ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
4507 	  if (ins_stmt)
4508 	    {
4509 	      uint64_t nn = n.n;
4510 	      for (unsigned HOST_WIDE_INT i = 0;
4511 		   i < const_bitsize;
4512 		   i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
4513 		if ((nn & MARKER_MASK) == 0
4514 		    || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
4515 		  {
4516 		    ins_stmt = NULL;
4517 		    break;
4518 		  }
4519 	      if (ins_stmt)
4520 		{
4521 		  if (invalid)
4522 		    {
4523 		      rhs_code = LROTATE_EXPR;
4524 		      ops[0].base_addr = NULL_TREE;
4525 		      ops[1].base_addr = NULL_TREE;
4526 		    }
4527 		  invalid = false;
4528 		}
4529 	    }
4530 	}
4531 
4532       if (invalid
4533 	  && bitsize.is_constant (&const_bitsize)
4534 	  && ((const_bitsize % BITS_PER_UNIT) != 0
4535 	      || !multiple_p (bitpos, BITS_PER_UNIT))
4536 	  && const_bitsize <= 64)
4537 	{
4538 	  /* Bypass a conversion to the bit-field type.  */
4539 	  if (!bit_not_p
4540 	      && is_gimple_assign (def_stmt)
4541 	      && CONVERT_EXPR_CODE_P (rhs_code))
4542 	    {
4543 	      tree rhs1 = gimple_assign_rhs1 (def_stmt);
4544 	      if (TREE_CODE (rhs1) == SSA_NAME
4545 		  && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
4546 		rhs = rhs1;
4547 	    }
4548 	  rhs_code = BIT_INSERT_EXPR;
4549 	  bit_not_p = false;
4550 	  ops[0].val = rhs;
4551 	  ops[0].base_addr = NULL_TREE;
4552 	  ops[1].base_addr = NULL_TREE;
4553 	  invalid = false;
4554 	}
4555     }
4556 
4557   unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
4558   unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
4559   if (invalid
4560       || !bitsize.is_constant (&const_bitsize)
4561       || !bitpos.is_constant (&const_bitpos)
4562       || !bitregion_start.is_constant (&const_bitregion_start)
4563       || !bitregion_end.is_constant (&const_bitregion_end))
4564     {
4565       terminate_all_aliasing_chains (NULL, stmt);
4566       return;
4567     }
4568 
4569   if (!ins_stmt)
4570     memset (&n, 0, sizeof (n));
4571 
4572   struct imm_store_chain_info **chain_info = NULL;
4573   if (base_addr)
4574     chain_info = m_stores.get (base_addr);
4575 
4576   store_immediate_info *info;
4577   if (chain_info)
4578     {
4579       unsigned int ord = (*chain_info)->m_store_info.length ();
4580       info = new store_immediate_info (const_bitsize, const_bitpos,
4581 				       const_bitregion_start,
4582 				       const_bitregion_end,
4583 				       stmt, ord, rhs_code, n, ins_stmt,
4584 				       bit_not_p, ops[0], ops[1]);
4585       if (dump_file && (dump_flags & TDF_DETAILS))
4586 	{
4587 	  fprintf (dump_file, "Recording immediate store from stmt:\n");
4588 	  print_gimple_stmt (dump_file, stmt, 0);
4589 	}
4590       (*chain_info)->m_store_info.safe_push (info);
4591       terminate_all_aliasing_chains (chain_info, stmt);
4592       /* If we reach the limit of stores to merge in a chain terminate and
4593 	 process the chain now.  */
4594       if ((*chain_info)->m_store_info.length ()
4595 	  == (unsigned int) PARAM_VALUE (PARAM_MAX_STORES_TO_MERGE))
4596 	{
4597 	  if (dump_file && (dump_flags & TDF_DETAILS))
4598 	    fprintf (dump_file,
4599 		     "Reached maximum number of statements to merge:\n");
4600 	  terminate_and_release_chain (*chain_info);
4601 	}
4602       return;
4603     }
4604 
4605   /* Store aliases any existing chain?  */
4606   terminate_all_aliasing_chains (NULL, stmt);
4607   /* Start a new chain.  */
4608   struct imm_store_chain_info *new_chain
4609     = new imm_store_chain_info (m_stores_head, base_addr);
4610   info = new store_immediate_info (const_bitsize, const_bitpos,
4611 				   const_bitregion_start,
4612 				   const_bitregion_end,
4613 				   stmt, 0, rhs_code, n, ins_stmt,
4614 				   bit_not_p, ops[0], ops[1]);
4615   new_chain->m_store_info.safe_push (info);
4616   m_stores.put (base_addr, new_chain);
4617   if (dump_file && (dump_flags & TDF_DETAILS))
4618     {
4619       fprintf (dump_file, "Starting new chain with statement:\n");
4620       print_gimple_stmt (dump_file, stmt, 0);
4621       fprintf (dump_file, "The base object is:\n");
4622       print_generic_expr (dump_file, base_addr);
4623       fprintf (dump_file, "\n");
4624     }
4625 }
4626 
4627 /* Entry point for the pass.  Go over each basic block recording chains of
4628    immediate stores.  Upon encountering a terminating statement (as defined
4629    by stmt_terminates_chain_p) process the recorded stores and emit the widened
4630    variants.  */
4631 
4632 unsigned int
execute(function * fun)4633 pass_store_merging::execute (function *fun)
4634 {
4635   basic_block bb;
4636   hash_set<gimple *> orig_stmts;
4637 
4638   calculate_dominance_info (CDI_DOMINATORS);
4639 
4640   FOR_EACH_BB_FN (bb, fun)
4641     {
4642       gimple_stmt_iterator gsi;
4643       unsigned HOST_WIDE_INT num_statements = 0;
4644       /* Record the original statements so that we can keep track of
4645 	 statements emitted in this pass and not re-process new
4646 	 statements.  */
4647       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4648 	{
4649 	  if (is_gimple_debug (gsi_stmt (gsi)))
4650 	    continue;
4651 
4652 	  if (++num_statements >= 2)
4653 	    break;
4654 	}
4655 
4656       if (num_statements < 2)
4657 	continue;
4658 
4659       if (dump_file && (dump_flags & TDF_DETAILS))
4660 	fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
4661 
4662       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4663 	{
4664 	  gimple *stmt = gsi_stmt (gsi);
4665 
4666 	  if (is_gimple_debug (stmt))
4667 	    continue;
4668 
4669 	  if (gimple_has_volatile_ops (stmt))
4670 	    {
4671 	      /* Terminate all chains.  */
4672 	      if (dump_file && (dump_flags & TDF_DETAILS))
4673 		fprintf (dump_file, "Volatile access terminates "
4674 				    "all chains\n");
4675 	      terminate_and_process_all_chains ();
4676 	      continue;
4677 	    }
4678 
4679 	  if (gimple_assign_single_p (stmt) && gimple_vdef (stmt)
4680 	      && !stmt_can_throw_internal (cfun, stmt)
4681 	      && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt)))
4682 	    process_store (stmt);
4683 	  else
4684 	    terminate_all_aliasing_chains (NULL, stmt);
4685 	}
4686       terminate_and_process_all_chains ();
4687     }
4688   return 0;
4689 }
4690 
4691 } // anon namespace
4692 
4693 /* Construct and return a store merging pass object.  */
4694 
4695 gimple_opt_pass *
make_pass_store_merging(gcc::context * ctxt)4696 make_pass_store_merging (gcc::context *ctxt)
4697 {
4698   return new pass_store_merging (ctxt);
4699 }
4700 
4701 #if CHECKING_P
4702 
4703 namespace selftest {
4704 
4705 /* Selftests for store merging helpers.  */
4706 
4707 /* Assert that all elements of the byte arrays X and Y, both of length N
4708    are equal.  */
4709 
4710 static void
verify_array_eq(unsigned char * x,unsigned char * y,unsigned int n)4711 verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
4712 {
4713   for (unsigned int i = 0; i < n; i++)
4714     {
4715       if (x[i] != y[i])
4716 	{
4717 	  fprintf (stderr, "Arrays do not match.  X:\n");
4718 	  dump_char_array (stderr, x, n);
4719 	  fprintf (stderr, "Y:\n");
4720 	  dump_char_array (stderr, y, n);
4721 	}
4722       ASSERT_EQ (x[i], y[i]);
4723     }
4724 }
4725 
4726 /* Test shift_bytes_in_array and that it carries bits across between
4727    bytes correctly.  */
4728 
4729 static void
verify_shift_bytes_in_array(void)4730 verify_shift_bytes_in_array (void)
4731 {
4732    /* byte 1   | byte 0
4733       00011111 | 11100000.  */
4734   unsigned char orig[2] = { 0xe0, 0x1f };
4735   unsigned char in[2];
4736   memcpy (in, orig, sizeof orig);
4737 
4738   unsigned char expected[2] = { 0x80, 0x7f };
4739   shift_bytes_in_array (in, sizeof (in), 2);
4740   verify_array_eq (in, expected, sizeof (in));
4741 
4742   memcpy (in, orig, sizeof orig);
4743   memcpy (expected, orig, sizeof orig);
4744   /* Check that shifting by zero doesn't change anything.  */
4745   shift_bytes_in_array (in, sizeof (in), 0);
4746   verify_array_eq (in, expected, sizeof (in));
4747 
4748 }
4749 
4750 /* Test shift_bytes_in_array_right and that it carries bits across between
4751    bytes correctly.  */
4752 
4753 static void
verify_shift_bytes_in_array_right(void)4754 verify_shift_bytes_in_array_right (void)
4755 {
4756    /* byte 1   | byte 0
4757       00011111 | 11100000.  */
4758   unsigned char orig[2] = { 0x1f, 0xe0};
4759   unsigned char in[2];
4760   memcpy (in, orig, sizeof orig);
4761   unsigned char expected[2] = { 0x07, 0xf8};
4762   shift_bytes_in_array_right (in, sizeof (in), 2);
4763   verify_array_eq (in, expected, sizeof (in));
4764 
4765   memcpy (in, orig, sizeof orig);
4766   memcpy (expected, orig, sizeof orig);
4767   /* Check that shifting by zero doesn't change anything.  */
4768   shift_bytes_in_array_right (in, sizeof (in), 0);
4769   verify_array_eq (in, expected, sizeof (in));
4770 }
4771 
4772 /* Test clear_bit_region that it clears exactly the bits asked and
4773    nothing more.  */
4774 
4775 static void
verify_clear_bit_region(void)4776 verify_clear_bit_region (void)
4777 {
4778   /* Start with all bits set and test clearing various patterns in them.  */
4779   unsigned char orig[3] = { 0xff, 0xff, 0xff};
4780   unsigned char in[3];
4781   unsigned char expected[3];
4782   memcpy (in, orig, sizeof in);
4783 
4784   /* Check zeroing out all the bits.  */
4785   clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
4786   expected[0] = expected[1] = expected[2] = 0;
4787   verify_array_eq (in, expected, sizeof in);
4788 
4789   memcpy (in, orig, sizeof in);
4790   /* Leave the first and last bits intact.  */
4791   clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
4792   expected[0] = 0x1;
4793   expected[1] = 0;
4794   expected[2] = 0x80;
4795   verify_array_eq (in, expected, sizeof in);
4796 }
4797 
4798 /* Test verify_clear_bit_region_be that it clears exactly the bits asked and
4799    nothing more.  */
4800 
4801 static void
verify_clear_bit_region_be(void)4802 verify_clear_bit_region_be (void)
4803 {
4804   /* Start with all bits set and test clearing various patterns in them.  */
4805   unsigned char orig[3] = { 0xff, 0xff, 0xff};
4806   unsigned char in[3];
4807   unsigned char expected[3];
4808   memcpy (in, orig, sizeof in);
4809 
4810   /* Check zeroing out all the bits.  */
4811   clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
4812   expected[0] = expected[1] = expected[2] = 0;
4813   verify_array_eq (in, expected, sizeof in);
4814 
4815   memcpy (in, orig, sizeof in);
4816   /* Leave the first and last bits intact.  */
4817   clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
4818   expected[0] = 0x80;
4819   expected[1] = 0;
4820   expected[2] = 0x1;
4821   verify_array_eq (in, expected, sizeof in);
4822 }
4823 
4824 
4825 /* Run all of the selftests within this file.  */
4826 
4827 void
store_merging_c_tests(void)4828 store_merging_c_tests (void)
4829 {
4830   verify_shift_bytes_in_array ();
4831   verify_shift_bytes_in_array_right ();
4832   verify_clear_bit_region ();
4833   verify_clear_bit_region_be ();
4834 }
4835 
4836 } // namespace selftest
4837 #endif /* CHECKING_P.  */
4838