1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3  * License, v. 2.0. If a copy of the MPL was not distributed with this
4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 
6 #include "jit/ShuffleAnalysis.h"
7 #include "jit/MIR.h"
8 
9 using namespace js;
10 using namespace jit;
11 
12 using mozilla::Maybe;
13 using mozilla::Nothing;
14 using mozilla::Some;
15 
16 #ifdef ENABLE_WASM_SIMD
17 
18 // Specialization analysis for SIMD operations.  This is still x86-centric but
19 // generalizes fairly easily to other architectures.
20 
21 // Optimization of v8x16.shuffle.  The general byte shuffle+blend is very
22 // expensive (equivalent to at least a dozen instructions), and we want to avoid
23 // that if we can.  So look for special cases - there are many.
24 //
25 // The strategy is to sort the operation into one of three buckets depending
26 // on the shuffle pattern and inputs:
27 //
28 //  - single operand; shuffles on these values are rotations, reversals,
29 //    transpositions, and general permutations
30 //  - single-operand-with-interesting-constant (especially zero); shuffles on
31 //    these values are often byte shift or scatter operations
32 //  - dual operand; shuffles on these operations are blends, catenated
33 //    shifts, and (in the worst case) general shuffle+blends
34 //
35 // We're not trying to solve the general problem, only to lower reasonably
36 // expressed patterns that express common operations.  Producers that produce
37 // dense and convoluted patterns will end up with the general byte shuffle.
38 // Producers that produce simpler patterns that easily map to hardware will
39 // get faster code.
40 //
41 // In particular, these matchers do not try to combine transformations, so a
42 // shuffle that optimally is lowered to rotate + permute32x4 + rotate, say, is
43 // usually going to end up as a general byte shuffle.
44 
45 // Reduce a 0..31 byte mask to a 0..15 word mask if possible and if so return
46 // true, updating *control.
ByteMaskToWordMask(SimdConstant * control)47 static bool ByteMaskToWordMask(SimdConstant* control) {
48   const SimdConstant::I8x16& lanes = control->asInt8x16();
49   int16_t controlWords[8];
50   for (int i = 0; i < 16; i += 2) {
51     if (!((lanes[i] & 1) == 0 && lanes[i + 1] == lanes[i] + 1)) {
52       return false;
53     }
54     controlWords[i / 2] = int16_t(lanes[i] / 2);
55   }
56   *control = SimdConstant::CreateX8(controlWords);
57   return true;
58 }
59 
60 // Reduce a 0..31 byte mask to a 0..7 dword mask if possible and if so return
61 // true, updating *control.
ByteMaskToDWordMask(SimdConstant * control)62 static bool ByteMaskToDWordMask(SimdConstant* control) {
63   const SimdConstant::I8x16& lanes = control->asInt8x16();
64   int32_t controlDWords[4];
65   for (int i = 0; i < 16; i += 4) {
66     if (!((lanes[i] & 3) == 0 && lanes[i + 1] == lanes[i] + 1 &&
67           lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3)) {
68       return false;
69     }
70     controlDWords[i / 4] = lanes[i] / 4;
71   }
72   *control = SimdConstant::CreateX4(controlDWords);
73   return true;
74 }
75 
76 // Reduce a 0..31 byte mask to a 0..3 qword mask if possible and if so return
77 // true, updating *control.
ByteMaskToQWordMask(SimdConstant * control)78 static bool ByteMaskToQWordMask(SimdConstant* control) {
79   const SimdConstant::I8x16& lanes = control->asInt8x16();
80   int64_t controlQWords[2];
81   for (int i = 0; i < 16; i += 8) {
82     if (!((lanes[i] & 7) == 0 && lanes[i + 1] == lanes[i] + 1 &&
83           lanes[i + 2] == lanes[i] + 2 && lanes[i + 3] == lanes[i] + 3 &&
84           lanes[i + 4] == lanes[i] + 4 && lanes[i + 5] == lanes[i] + 5 &&
85           lanes[i + 6] == lanes[i] + 6 && lanes[i + 7] == lanes[i] + 7)) {
86       return false;
87     }
88     controlQWords[i / 8] = lanes[i] / 8;
89   }
90   *control = SimdConstant::CreateX2(controlQWords);
91   return true;
92 }
93 
94 // Skip across consecutive values in lanes starting at i, returning the index
95 // after the last element.  Lane values must be <= len-1 ("masked").
96 //
97 // Since every element is a 1-element run, the return value is never the same as
98 // the starting i.
99 template <typename T>
ScanIncreasingMasked(const T * lanes,int i)100 static int ScanIncreasingMasked(const T* lanes, int i) {
101   int len = int(16 / sizeof(T));
102   MOZ_ASSERT(i < len);
103   MOZ_ASSERT(lanes[i] <= len - 1);
104   i++;
105   while (i < len && lanes[i] == lanes[i - 1] + 1) {
106     MOZ_ASSERT(lanes[i] <= len - 1);
107     i++;
108   }
109   return i;
110 }
111 
112 // Skip across consecutive values in lanes starting at i, returning the index
113 // after the last element.  Lane values must be <= len*2-1 ("unmasked"); the
114 // values len-1 and len are not considered consecutive.
115 //
116 // Since every element is a 1-element run, the return value is never the same as
117 // the starting i.
118 template <typename T>
ScanIncreasingUnmasked(const T * lanes,int i)119 static int ScanIncreasingUnmasked(const T* lanes, int i) {
120   int len = int(16 / sizeof(T));
121   MOZ_ASSERT(i < len);
122   if (lanes[i] < len) {
123     i++;
124     while (i < len && lanes[i] < len && lanes[i - 1] == lanes[i] - 1) {
125       i++;
126     }
127   } else {
128     i++;
129     while (i < len && lanes[i] >= len && lanes[i - 1] == lanes[i] - 1) {
130       i++;
131     }
132   }
133   return i;
134 }
135 
136 // Skip lanes that equal v starting at i, returning the index just beyond the
137 // last of those.  There is no requirement that the initial lanes[i] == v.
138 template <typename T>
ScanConstant(const T * lanes,int v,int i)139 static int ScanConstant(const T* lanes, int v, int i) {
140   int len = int(16 / sizeof(T));
141   MOZ_ASSERT(i <= len);
142   while (i < len && lanes[i] == v) {
143     i++;
144   }
145   return i;
146 }
147 
148 // Mask lane values denoting rhs elements into lhs elements.
149 template <typename T>
MaskLanes(T * result,const T * input)150 static void MaskLanes(T* result, const T* input) {
151   int len = int(16 / sizeof(T));
152   for (int i = 0; i < len; i++) {
153     result[i] = input[i] & (len - 1);
154   }
155 }
156 
157 // Apply a transformation to each lane value.
158 template <typename T>
MapLanes(T * result,const T * input,int (* f)(int))159 static void MapLanes(T* result, const T* input, int (*f)(int)) {
160   // Hazard analysis trips on "IndirectCall: f" error.
161   // Suppress the check -- `f` is expected to be trivial here.
162   JS::AutoSuppressGCAnalysis nogc;
163 
164   int len = int(16 / sizeof(T));
165   for (int i = 0; i < len; i++) {
166     result[i] = f(input[i]);
167   }
168 }
169 
170 // Recognize an identity permutation, assuming lanes is masked.
171 template <typename T>
IsIdentity(const T * lanes)172 static bool IsIdentity(const T* lanes) {
173   return ScanIncreasingMasked(lanes, 0) == int(16 / sizeof(T));
174 }
175 
176 // Recognize part of an identity permutation starting at start, with
177 // the first value of the permutation expected to be bias.
178 template <typename T>
IsIdentity(const T * lanes,int start,int len,int bias)179 static bool IsIdentity(const T* lanes, int start, int len, int bias) {
180   if (lanes[start] != bias) {
181     return false;
182   }
183   for (int i = start + 1; i < start + len; i++) {
184     if (lanes[i] != lanes[i - 1] + 1) {
185       return false;
186     }
187   }
188   return true;
189 }
190 
191 // We can permute by dwords if the mask is reducible to a dword mask, and in
192 // this case a single PSHUFD is enough.
TryPermute32x4(SimdConstant * control)193 static bool TryPermute32x4(SimdConstant* control) {
194   SimdConstant tmp = *control;
195   if (!ByteMaskToDWordMask(&tmp)) {
196     return false;
197   }
198   *control = tmp;
199   return true;
200 }
201 
202 // Can we perform a byte rotate right?  We can use PALIGNR.  The shift count is
203 // just lanes[0], and *control is unchanged.
TryRotateRight8x16(SimdConstant * control)204 static bool TryRotateRight8x16(SimdConstant* control) {
205   const SimdConstant::I8x16& lanes = control->asInt8x16();
206   // Look for the end of the first run of consecutive bytes.
207   int i = ScanIncreasingMasked(lanes, 0);
208 
209   // First run must start at a value s.t. we have a rotate if all remaining
210   // bytes are a run.
211   if (lanes[0] != 16 - i) {
212     return false;
213   }
214 
215   // If we reached the end of the vector, we're done.
216   if (i == 16) {
217     return true;
218   }
219 
220   // Second run must start at source lane zero.
221   if (lanes[i] != 0) {
222     return false;
223   }
224 
225   // Second run must end at the end of the lane vector.
226   return ScanIncreasingMasked(lanes, i) == 16;
227 }
228 
229 // We can permute by words if the mask is reducible to a word mask.
TryPermute16x8(SimdConstant * control)230 static bool TryPermute16x8(SimdConstant* control) {
231   SimdConstant tmp = *control;
232   if (!ByteMaskToWordMask(&tmp)) {
233     return false;
234   }
235   *control = tmp;
236   return true;
237 }
238 
239 // A single word lane is copied into all the other lanes: PSHUF*W + PSHUFD.
TryBroadcast16x8(SimdConstant * control)240 static bool TryBroadcast16x8(SimdConstant* control) {
241   SimdConstant tmp = *control;
242   if (!ByteMaskToWordMask(&tmp)) {
243     return false;
244   }
245   const SimdConstant::I16x8& lanes = tmp.asInt16x8();
246   if (ScanConstant(lanes, lanes[0], 0) < 8) {
247     return false;
248   }
249   *control = tmp;
250   return true;
251 }
252 
253 // A single byte lane is copied int all the other lanes: PUNPCK*BW + PSHUF*W +
254 // PSHUFD.
TryBroadcast8x16(SimdConstant * control)255 static bool TryBroadcast8x16(SimdConstant* control) {
256   const SimdConstant::I8x16& lanes = control->asInt8x16();
257   return ScanConstant(lanes, lanes[0], 0) >= 16;
258 }
259 
260 template <int N>
TryReverse(SimdConstant * control)261 static bool TryReverse(SimdConstant* control) {
262   const SimdConstant::I8x16& lanes = control->asInt8x16();
263   for (int i = 0; i < 16; i++) {
264     if (lanes[i] != (i ^ (N - 1))) {
265       return false;
266     }
267   }
268   return true;
269 }
270 
271 // Look for permutations of a single operand.
AnalyzePermute(SimdConstant * control)272 static SimdPermuteOp AnalyzePermute(SimdConstant* control) {
273   // Lane indices are input-agnostic for single-operand permutations.
274   SimdConstant::I8x16 controlBytes;
275   MaskLanes(controlBytes, control->asInt8x16());
276 
277   // Get rid of no-ops immediately, so nobody else needs to check.
278   if (IsIdentity(controlBytes)) {
279     return SimdPermuteOp::MOVE;
280   }
281 
282   // Default control is the masked bytes.
283   *control = SimdConstant::CreateX16(controlBytes);
284 
285   // Analysis order matters here and is architecture-dependent or even
286   // microarchitecture-dependent: ideally the cheapest implementation first.
287   // The Intel manual says that the cost of a PSHUFB is about five other
288   // operations, so make that our cutoff.
289   //
290   // Word, dword, and qword reversals are handled optimally by general permutes.
291   //
292   // Byte reversals are probably best left to PSHUFB, no alternative rendition
293   // seems to reliably go below five instructions.  (Discuss.)
294   //
295   // Word swaps within doublewords and dword swaps within quadwords are handled
296   // optimally by general permutes.
297   //
298   // Dword and qword broadcasts are handled by dword permute.
299 
300   if (TryPermute32x4(control)) {
301     return SimdPermuteOp::PERMUTE_32x4;
302   }
303   if (TryRotateRight8x16(control)) {
304     return SimdPermuteOp::ROTATE_RIGHT_8x16;
305   }
306   if (TryBroadcast16x8(control)) {
307     return SimdPermuteOp::BROADCAST_16x8;
308   }
309   if (TryPermute16x8(control)) {
310     return SimdPermuteOp::PERMUTE_16x8;
311   }
312   if (TryBroadcast8x16(control)) {
313     return SimdPermuteOp::BROADCAST_8x16;
314   }
315   if (TryReverse<2>(control)) {
316     return SimdPermuteOp::REVERSE_16x8;
317   }
318   if (TryReverse<4>(control)) {
319     return SimdPermuteOp::REVERSE_32x4;
320   }
321   if (TryReverse<8>(control)) {
322     return SimdPermuteOp::REVERSE_64x2;
323   }
324 
325   // TODO: (From v8) Unzip and transpose generally have renditions that slightly
326   // beat a general permute (three or four instructions)
327   //
328   // TODO: (From MacroAssemblerX86Shared::ShuffleX4): MOVLHPS and MOVHLPS can be
329   // used when merging two values.
330 
331   // The default operation is to permute bytes with the default control.
332   return SimdPermuteOp::PERMUTE_8x16;
333 }
334 
335 // Can we shift the bytes left or right by a constant?  A shift is a run of
336 // lanes from the rhs (which is zero) on one end and a run of values from the
337 // lhs on the other end.
TryShift8x16(SimdConstant * control)338 static Maybe<SimdPermuteOp> TryShift8x16(SimdConstant* control) {
339   const SimdConstant::I8x16& lanes = control->asInt8x16();
340 
341   // Represent all zero lanes by 16
342   SimdConstant::I8x16 zeroesMasked;
343   MapLanes(zeroesMasked, lanes, [](int x) -> int { return x >= 16 ? 16 : x; });
344 
345   int i = ScanConstant(zeroesMasked, 16, 0);
346   int shiftLeft = i;
347   if (shiftLeft > 0 && lanes[shiftLeft] != 0) {
348     return Nothing();
349   }
350 
351   i = ScanIncreasingUnmasked(zeroesMasked, i);
352   int shiftRight = 16 - i;
353   if (shiftRight > 0 && lanes[i - 1] != 15) {
354     return Nothing();
355   }
356 
357   i = ScanConstant(zeroesMasked, 16, i);
358   if (i < 16 || (shiftRight > 0 && shiftLeft > 0) ||
359       (shiftRight == 0 && shiftLeft == 0)) {
360     return Nothing();
361   }
362 
363   if (shiftRight) {
364     *control = SimdConstant::SplatX16((int8_t)shiftRight);
365     return Some(SimdPermuteOp::SHIFT_RIGHT_8x16);
366   }
367   *control = SimdConstant::SplatX16((int8_t)shiftLeft);
368   return Some(SimdPermuteOp::SHIFT_LEFT_8x16);
369 }
370 
AnalyzeShuffleWithZero(SimdConstant * control)371 static Maybe<SimdPermuteOp> AnalyzeShuffleWithZero(SimdConstant* control) {
372   Maybe<SimdPermuteOp> op;
373   op = TryShift8x16(control);
374   if (op) {
375     return op;
376   }
377 
378   // TODO: Optimization opportunity? A byte-blend-with-zero is just a CONST;
379   // PAND.  This may beat the general byte blend code below.
380   return Nothing();
381 }
382 
383 // Concat: if the result is the suffix (high bytes) of the rhs in front of a
384 // prefix (low bytes) of the lhs then this is PALIGNR; ditto if the operands are
385 // swapped.
TryConcatRightShift8x16(SimdConstant * control,bool * swapOperands)386 static Maybe<SimdShuffleOp> TryConcatRightShift8x16(SimdConstant* control,
387                                                     bool* swapOperands) {
388   const SimdConstant::I8x16& lanes = control->asInt8x16();
389   int i = ScanIncreasingUnmasked(lanes, 0);
390   MOZ_ASSERT(i < 16, "Single-operand run should have been handled elswhere");
391   // First run must end with 15 % 16
392   if ((lanes[i - 1] & 15) != 15) {
393     return Nothing();
394   }
395   // Second run must start with 0 % 16
396   if ((lanes[i] & 15) != 0) {
397     return Nothing();
398   }
399   // The two runs must come from different inputs
400   if ((lanes[i] & 16) == (lanes[i - 1] & 16)) {
401     return Nothing();
402   }
403   int suffixLength = i;
404 
405   i = ScanIncreasingUnmasked(lanes, i);
406   // Must end at the left end
407   if (i != 16) {
408     return Nothing();
409   }
410 
411   // If the suffix is from the lhs then swap the operands
412   if (lanes[0] < 16) {
413     *swapOperands = !*swapOperands;
414   }
415   *control = SimdConstant::SplatX16((int8_t)suffixLength);
416   return Some(SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16);
417 }
418 
419 // Blend words: if we pick words from both operands without a pattern but all
420 // the input words stay in their position then this is PBLENDW (immediate mask);
421 // this also handles all larger sizes on x64.
TryBlendInt16x8(SimdConstant * control)422 static Maybe<SimdShuffleOp> TryBlendInt16x8(SimdConstant* control) {
423   SimdConstant tmp(*control);
424   if (!ByteMaskToWordMask(&tmp)) {
425     return Nothing();
426   }
427   SimdConstant::I16x8 masked;
428   MaskLanes(masked, tmp.asInt16x8());
429   if (!IsIdentity(masked)) {
430     return Nothing();
431   }
432   SimdConstant::I16x8 mapped;
433   MapLanes(mapped, tmp.asInt16x8(),
434            [](int x) -> int { return x < 8 ? 0 : -1; });
435   *control = SimdConstant::CreateX8(mapped);
436   return Some(SimdShuffleOp::BLEND_16x8);
437 }
438 
439 // Blend bytes: if we pick bytes ditto then this is a byte blend, which can be
440 // handled with a CONST, PAND, PANDNOT, and POR.
441 //
442 // TODO: Optimization opportunity? If we pick all but one lanes from one with at
443 // most one from the other then it could be a MOV + PEXRB + PINSRB (also if this
444 // element is not in its source location).
TryBlendInt8x16(SimdConstant * control)445 static Maybe<SimdShuffleOp> TryBlendInt8x16(SimdConstant* control) {
446   SimdConstant::I8x16 masked;
447   MaskLanes(masked, control->asInt8x16());
448   if (!IsIdentity(masked)) {
449     return Nothing();
450   }
451   SimdConstant::I8x16 mapped;
452   MapLanes(mapped, control->asInt8x16(),
453            [](int x) -> int { return x < 16 ? 0 : -1; });
454   *control = SimdConstant::CreateX16(mapped);
455   return Some(SimdShuffleOp::BLEND_8x16);
456 }
457 
458 template <typename T>
MatchInterleave(const T * lanes,int lhs,int rhs,int len)459 static bool MatchInterleave(const T* lanes, int lhs, int rhs, int len) {
460   for (int i = 0; i < len; i++) {
461     if (lanes[i * 2] != lhs + i || lanes[i * 2 + 1] != rhs + i) {
462       return false;
463     }
464   }
465   return true;
466 }
467 
468 // Unpack/interleave:
469 //  - if we interleave the low (bytes/words/doublewords) of the inputs into
470 //    the output then this is UNPCKL*W (possibly with a swap of operands).
471 //  - if we interleave the high ditto then it is UNPCKH*W (ditto)
472 template <typename T>
TryInterleave(const T * lanes,int lhs,int rhs,bool * swapOperands,SimdShuffleOp lowOp,SimdShuffleOp highOp)473 static Maybe<SimdShuffleOp> TryInterleave(const T* lanes, int lhs, int rhs,
474                                           bool* swapOperands,
475                                           SimdShuffleOp lowOp,
476                                           SimdShuffleOp highOp) {
477   int len = int(32 / (sizeof(T) * 4));
478   if (MatchInterleave(lanes, lhs, rhs, len)) {
479     return Some(lowOp);
480   }
481   if (MatchInterleave(lanes, rhs, lhs, len)) {
482     *swapOperands = !*swapOperands;
483     return Some(lowOp);
484   }
485   if (MatchInterleave(lanes, lhs + len, rhs + len, len)) {
486     return Some(highOp);
487   }
488   if (MatchInterleave(lanes, rhs + len, lhs + len, len)) {
489     *swapOperands = !*swapOperands;
490     return Some(highOp);
491   }
492   return Nothing();
493 }
494 
TryInterleave64x2(SimdConstant * control,bool * swapOperands)495 static Maybe<SimdShuffleOp> TryInterleave64x2(SimdConstant* control,
496                                               bool* swapOperands) {
497   SimdConstant tmp = *control;
498   if (!ByteMaskToQWordMask(&tmp)) {
499     return Nothing();
500   }
501   const SimdConstant::I64x2& lanes = tmp.asInt64x2();
502   return TryInterleave(lanes, 0, 2, swapOperands,
503                        SimdShuffleOp::INTERLEAVE_LOW_64x2,
504                        SimdShuffleOp::INTERLEAVE_HIGH_64x2);
505 }
506 
TryInterleave32x4(SimdConstant * control,bool * swapOperands)507 static Maybe<SimdShuffleOp> TryInterleave32x4(SimdConstant* control,
508                                               bool* swapOperands) {
509   SimdConstant tmp = *control;
510   if (!ByteMaskToDWordMask(&tmp)) {
511     return Nothing();
512   }
513   const SimdConstant::I32x4& lanes = tmp.asInt32x4();
514   return TryInterleave(lanes, 0, 4, swapOperands,
515                        SimdShuffleOp::INTERLEAVE_LOW_32x4,
516                        SimdShuffleOp::INTERLEAVE_HIGH_32x4);
517 }
518 
TryInterleave16x8(SimdConstant * control,bool * swapOperands)519 static Maybe<SimdShuffleOp> TryInterleave16x8(SimdConstant* control,
520                                               bool* swapOperands) {
521   SimdConstant tmp = *control;
522   if (!ByteMaskToWordMask(&tmp)) {
523     return Nothing();
524   }
525   const SimdConstant::I16x8& lanes = tmp.asInt16x8();
526   return TryInterleave(lanes, 0, 8, swapOperands,
527                        SimdShuffleOp::INTERLEAVE_LOW_16x8,
528                        SimdShuffleOp::INTERLEAVE_HIGH_16x8);
529 }
530 
TryInterleave8x16(SimdConstant * control,bool * swapOperands)531 static Maybe<SimdShuffleOp> TryInterleave8x16(SimdConstant* control,
532                                               bool* swapOperands) {
533   const SimdConstant::I8x16& lanes = control->asInt8x16();
534   return TryInterleave(lanes, 0, 16, swapOperands,
535                        SimdShuffleOp::INTERLEAVE_LOW_8x16,
536                        SimdShuffleOp::INTERLEAVE_HIGH_8x16);
537 }
538 
AnalyzeTwoArgShuffle(SimdConstant * control,bool * swapOperands)539 static SimdShuffleOp AnalyzeTwoArgShuffle(SimdConstant* control,
540                                           bool* swapOperands) {
541   Maybe<SimdShuffleOp> op;
542   op = TryConcatRightShift8x16(control, swapOperands);
543   if (!op) {
544     op = TryBlendInt16x8(control);
545   }
546   if (!op) {
547     op = TryBlendInt8x16(control);
548   }
549   if (!op) {
550     op = TryInterleave64x2(control, swapOperands);
551   }
552   if (!op) {
553     op = TryInterleave32x4(control, swapOperands);
554   }
555   if (!op) {
556     op = TryInterleave16x8(control, swapOperands);
557   }
558   if (!op) {
559     op = TryInterleave8x16(control, swapOperands);
560   }
561   if (!op) {
562     op = Some(SimdShuffleOp::SHUFFLE_BLEND_8x16);
563   }
564   return *op;
565 }
566 
567 // Reorder the operands if that seems useful, notably, move a constant to the
568 // right hand side.  Rewrites the control to account for any move.
MaybeReorderShuffleOperands(MDefinition ** lhs,MDefinition ** rhs,SimdConstant * control)569 static bool MaybeReorderShuffleOperands(MDefinition** lhs, MDefinition** rhs,
570                                         SimdConstant* control) {
571   if ((*lhs)->isWasmFloatConstant()) {
572     MDefinition* tmp = *lhs;
573     *lhs = *rhs;
574     *rhs = tmp;
575 
576     int8_t controlBytes[16];
577     const SimdConstant::I8x16& lanes = control->asInt8x16();
578     for (unsigned i = 0; i < 16; i++) {
579       controlBytes[i] = int8_t(lanes[i] ^ 16);
580     }
581     *control = SimdConstant::CreateX16(controlBytes);
582 
583     return true;
584   }
585   return false;
586 }
587 
588 #  ifdef DEBUG
ReportShuffleSpecialization(const SimdShuffle & s)589 static const SimdShuffle& ReportShuffleSpecialization(const SimdShuffle& s) {
590   switch (s.opd) {
591     case SimdShuffle::Operand::BOTH:
592     case SimdShuffle::Operand::BOTH_SWAPPED:
593       switch (*s.shuffleOp) {
594         case SimdShuffleOp::SHUFFLE_BLEND_8x16:
595           js::wasm::ReportSimdAnalysis("shuffle -> shuffle+blend 8x16");
596           break;
597         case SimdShuffleOp::BLEND_8x16:
598           js::wasm::ReportSimdAnalysis("shuffle -> blend 8x16");
599           break;
600         case SimdShuffleOp::BLEND_16x8:
601           js::wasm::ReportSimdAnalysis("shuffle -> blend 16x8");
602           break;
603         case SimdShuffleOp::CONCAT_RIGHT_SHIFT_8x16:
604           js::wasm::ReportSimdAnalysis("shuffle -> concat+shift-right 8x16");
605           break;
606         case SimdShuffleOp::INTERLEAVE_HIGH_8x16:
607           js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 8x16");
608           break;
609         case SimdShuffleOp::INTERLEAVE_HIGH_16x8:
610           js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 16x8");
611           break;
612         case SimdShuffleOp::INTERLEAVE_HIGH_32x4:
613           js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 32x4");
614           break;
615         case SimdShuffleOp::INTERLEAVE_HIGH_64x2:
616           js::wasm::ReportSimdAnalysis("shuffle -> interleave-high 64x2");
617           break;
618         case SimdShuffleOp::INTERLEAVE_LOW_8x16:
619           js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 8x16");
620           break;
621         case SimdShuffleOp::INTERLEAVE_LOW_16x8:
622           js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 16x8");
623           break;
624         case SimdShuffleOp::INTERLEAVE_LOW_32x4:
625           js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 32x4");
626           break;
627         case SimdShuffleOp::INTERLEAVE_LOW_64x2:
628           js::wasm::ReportSimdAnalysis("shuffle -> interleave-low 64x2");
629           break;
630         default:
631           MOZ_CRASH("Unexpected shuffle op");
632       }
633       break;
634     case SimdShuffle::Operand::LEFT:
635     case SimdShuffle::Operand::RIGHT:
636       switch (*s.permuteOp) {
637         case SimdPermuteOp::BROADCAST_8x16:
638           js::wasm::ReportSimdAnalysis("shuffle -> broadcast 8x16");
639           break;
640         case SimdPermuteOp::BROADCAST_16x8:
641           js::wasm::ReportSimdAnalysis("shuffle -> broadcast 16x8");
642           break;
643         case SimdPermuteOp::MOVE:
644           js::wasm::ReportSimdAnalysis("shuffle -> move");
645           break;
646         case SimdPermuteOp::REVERSE_16x8:
647           js::wasm::ReportSimdAnalysis(
648               "shuffle -> reverse bytes in 16-bit lanes");
649           break;
650         case SimdPermuteOp::REVERSE_32x4:
651           js::wasm::ReportSimdAnalysis(
652               "shuffle -> reverse bytes in 32-bit lanes");
653           break;
654         case SimdPermuteOp::REVERSE_64x2:
655           js::wasm::ReportSimdAnalysis(
656               "shuffle -> reverse bytes in 64-bit lanes");
657           break;
658         case SimdPermuteOp::PERMUTE_8x16:
659           js::wasm::ReportSimdAnalysis("shuffle -> permute 8x16");
660           break;
661         case SimdPermuteOp::PERMUTE_16x8:
662           js::wasm::ReportSimdAnalysis("shuffle -> permute 16x8");
663           break;
664         case SimdPermuteOp::PERMUTE_32x4:
665           js::wasm::ReportSimdAnalysis("shuffle -> permute 32x4");
666           break;
667         case SimdPermuteOp::ROTATE_RIGHT_8x16:
668           js::wasm::ReportSimdAnalysis("shuffle -> rotate-right 8x16");
669           break;
670         case SimdPermuteOp::SHIFT_LEFT_8x16:
671           js::wasm::ReportSimdAnalysis("shuffle -> shift-left 8x16");
672           break;
673         case SimdPermuteOp::SHIFT_RIGHT_8x16:
674           js::wasm::ReportSimdAnalysis("shuffle -> shift-right 8x16");
675           break;
676         default:
677           MOZ_CRASH("Unexpected permute op");
678       }
679       break;
680   }
681   return s;
682 }
683 #  endif  // DEBUG
684 
AnalyzeSimdShuffle(SimdConstant control,MDefinition * lhs,MDefinition * rhs)685 SimdShuffle jit::AnalyzeSimdShuffle(SimdConstant control, MDefinition* lhs,
686                                     MDefinition* rhs) {
687 #  ifdef DEBUG
688 #    define R(s) ReportShuffleSpecialization(s)
689 #  else
690 #    define R(s) (s)
691 #  endif
692 
693   // If only one of the inputs is used, determine which.
694   bool useLeft = true;
695   bool useRight = true;
696   if (lhs == rhs) {
697     useRight = false;
698   } else {
699     bool allAbove = true;
700     bool allBelow = true;
701     const SimdConstant::I8x16& lanes = control.asInt8x16();
702     for (int8_t i : lanes) {
703       allAbove = allAbove && i >= 16;
704       allBelow = allBelow && i < 16;
705     }
706     if (allAbove) {
707       useLeft = false;
708     } else if (allBelow) {
709       useRight = false;
710     }
711   }
712 
713   // Deal with one-ignored-input.
714   if (!(useLeft && useRight)) {
715     SimdPermuteOp op = AnalyzePermute(&control);
716     return R(SimdShuffle::permute(
717         useLeft ? SimdShuffle::Operand::LEFT : SimdShuffle::Operand::RIGHT,
718         control, op));
719   }
720 
721   // Move constants to rhs.
722   bool swapOperands = MaybeReorderShuffleOperands(&lhs, &rhs, &control);
723 
724   // Deal with constant rhs.
725   if (rhs->isWasmFloatConstant()) {
726     SimdConstant rhsConstant = rhs->toWasmFloatConstant()->toSimd128();
727     if (rhsConstant.isZeroBits()) {
728       Maybe<SimdPermuteOp> op = AnalyzeShuffleWithZero(&control);
729       if (op) {
730         return R(SimdShuffle::permute(swapOperands ? SimdShuffle::Operand::RIGHT
731                                                    : SimdShuffle::Operand::LEFT,
732                                       control, *op));
733       }
734     }
735   }
736 
737   // Two operands both of which are used.  If there's one constant operand it is
738   // now on the rhs.
739   SimdShuffleOp op = AnalyzeTwoArgShuffle(&control, &swapOperands);
740   return R(SimdShuffle::shuffle(swapOperands
741                                     ? SimdShuffle::Operand::BOTH_SWAPPED
742                                     : SimdShuffle::Operand::BOTH,
743                                 control, op));
744 #  undef R
745 }
746 
747 #endif  // ENABLE_WASM_SIMD
748