1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2  * vim: set ts=8 sts=2 et sw=2 tw=80:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "jit/x86/MacroAssembler-x86.h"
8 
9 #include "mozilla/Alignment.h"
10 #include "mozilla/Casting.h"
11 
12 #include "jit/AtomicOp.h"
13 #include "jit/Bailouts.h"
14 #include "jit/BaselineFrame.h"
15 #include "jit/JitFrames.h"
16 #include "jit/JitRuntime.h"
17 #include "jit/MacroAssembler.h"
18 #include "jit/MoveEmitter.h"
19 #include "util/Memory.h"
20 #include "vm/JitActivation.h"  // js::jit::JitActivation
21 #include "vm/JSContext.h"
22 
23 #include "jit/MacroAssembler-inl.h"
24 #include "vm/JSScript-inl.h"
25 
26 using namespace js;
27 using namespace js::jit;
28 
loadConstantDouble(double d,FloatRegister dest)29 void MacroAssemblerX86::loadConstantDouble(double d, FloatRegister dest) {
30   if (maybeInlineDouble(d, dest)) {
31     return;
32   }
33   Double* dbl = getDouble(d);
34   if (!dbl) {
35     return;
36   }
37   masm.vmovsd_mr(nullptr, dest.encoding());
38   propagateOOM(dbl->uses.append(CodeOffset(masm.size())));
39 }
40 
loadConstantFloat32(float f,FloatRegister dest)41 void MacroAssemblerX86::loadConstantFloat32(float f, FloatRegister dest) {
42   if (maybeInlineFloat(f, dest)) {
43     return;
44   }
45   Float* flt = getFloat(f);
46   if (!flt) {
47     return;
48   }
49   masm.vmovss_mr(nullptr, dest.encoding());
50   propagateOOM(flt->uses.append(CodeOffset(masm.size())));
51 }
52 
loadConstantSimd128Int(const SimdConstant & v,FloatRegister dest)53 void MacroAssemblerX86::loadConstantSimd128Int(const SimdConstant& v,
54                                                FloatRegister dest) {
55   if (maybeInlineSimd128Int(v, dest)) {
56     return;
57   }
58   SimdData* i4 = getSimdData(v);
59   if (!i4) {
60     return;
61   }
62   masm.vmovdqa_mr(nullptr, dest.encoding());
63   propagateOOM(i4->uses.append(CodeOffset(masm.size())));
64 }
65 
loadConstantSimd128Float(const SimdConstant & v,FloatRegister dest)66 void MacroAssemblerX86::loadConstantSimd128Float(const SimdConstant& v,
67                                                  FloatRegister dest) {
68   if (maybeInlineSimd128Float(v, dest)) {
69     return;
70   }
71   SimdData* f4 = getSimdData(v);
72   if (!f4) {
73     return;
74   }
75   masm.vmovaps_mr(nullptr, dest.encoding());
76   propagateOOM(f4->uses.append(CodeOffset(masm.size())));
77 }
78 
vpPatchOpSimd128(const SimdConstant & v,FloatRegister reg,void (X86Encoding::BaseAssemblerX86::* op)(const void * address,X86Encoding::XMMRegisterID srcId,X86Encoding::XMMRegisterID destId))79 void MacroAssemblerX86::vpPatchOpSimd128(
80     const SimdConstant& v, FloatRegister reg,
81     void (X86Encoding::BaseAssemblerX86::*op)(
82         const void* address, X86Encoding::XMMRegisterID srcId,
83         X86Encoding::XMMRegisterID destId)) {
84   SimdData* val = getSimdData(v);
85   if (!val) {
86     return;
87   }
88   (masm.*op)(nullptr, reg.encoding(), reg.encoding());
89   propagateOOM(val->uses.append(CodeOffset(masm.size())));
90 }
91 
vpPatchOpSimd128(const SimdConstant & v,FloatRegister reg,size_t (X86Encoding::BaseAssemblerX86::* op)(const void * address,X86Encoding::XMMRegisterID srcId,X86Encoding::XMMRegisterID destId))92 void MacroAssemblerX86::vpPatchOpSimd128(
93     const SimdConstant& v, FloatRegister reg,
94     size_t (X86Encoding::BaseAssemblerX86::*op)(
95         const void* address, X86Encoding::XMMRegisterID srcId,
96         X86Encoding::XMMRegisterID destId)) {
97   SimdData* val = getSimdData(v);
98   if (!val) {
99     return;
100   }
101   size_t patchOffsetFromEnd =
102       (masm.*op)(nullptr, reg.encoding(), reg.encoding());
103   propagateOOM(val->uses.append(CodeOffset(masm.size() - patchOffsetFromEnd)));
104 }
105 
vpaddbSimd128(const SimdConstant & v,FloatRegister srcDest)106 void MacroAssemblerX86::vpaddbSimd128(const SimdConstant& v,
107                                       FloatRegister srcDest) {
108   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpaddb_mr);
109 }
110 
vpaddwSimd128(const SimdConstant & v,FloatRegister srcDest)111 void MacroAssemblerX86::vpaddwSimd128(const SimdConstant& v,
112                                       FloatRegister srcDest) {
113   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpaddw_mr);
114 }
115 
vpadddSimd128(const SimdConstant & v,FloatRegister srcDest)116 void MacroAssemblerX86::vpadddSimd128(const SimdConstant& v,
117                                       FloatRegister srcDest) {
118   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpaddd_mr);
119 }
120 
vpaddqSimd128(const SimdConstant & v,FloatRegister srcDest)121 void MacroAssemblerX86::vpaddqSimd128(const SimdConstant& v,
122                                       FloatRegister srcDest) {
123   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpaddq_mr);
124 }
125 
vpsubbSimd128(const SimdConstant & v,FloatRegister srcDest)126 void MacroAssemblerX86::vpsubbSimd128(const SimdConstant& v,
127                                       FloatRegister srcDest) {
128   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpsubb_mr);
129 }
130 
vpsubwSimd128(const SimdConstant & v,FloatRegister srcDest)131 void MacroAssemblerX86::vpsubwSimd128(const SimdConstant& v,
132                                       FloatRegister srcDest) {
133   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpsubw_mr);
134 }
135 
vpsubdSimd128(const SimdConstant & v,FloatRegister srcDest)136 void MacroAssemblerX86::vpsubdSimd128(const SimdConstant& v,
137                                       FloatRegister srcDest) {
138   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpsubd_mr);
139 }
140 
vpsubqSimd128(const SimdConstant & v,FloatRegister srcDest)141 void MacroAssemblerX86::vpsubqSimd128(const SimdConstant& v,
142                                       FloatRegister srcDest) {
143   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpsubq_mr);
144 }
145 
vpmullwSimd128(const SimdConstant & v,FloatRegister srcDest)146 void MacroAssemblerX86::vpmullwSimd128(const SimdConstant& v,
147                                        FloatRegister srcDest) {
148   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpmullw_mr);
149 }
150 
vpmulldSimd128(const SimdConstant & v,FloatRegister srcDest)151 void MacroAssemblerX86::vpmulldSimd128(const SimdConstant& v,
152                                        FloatRegister srcDest) {
153   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpmulld_mr);
154 }
155 
vpaddsbSimd128(const SimdConstant & v,FloatRegister srcDest)156 void MacroAssemblerX86::vpaddsbSimd128(const SimdConstant& v,
157                                        FloatRegister srcDest) {
158   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpaddsb_mr);
159 }
160 
vpaddusbSimd128(const SimdConstant & v,FloatRegister srcDest)161 void MacroAssemblerX86::vpaddusbSimd128(const SimdConstant& v,
162                                         FloatRegister srcDest) {
163   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpaddusb_mr);
164 }
165 
vpaddswSimd128(const SimdConstant & v,FloatRegister srcDest)166 void MacroAssemblerX86::vpaddswSimd128(const SimdConstant& v,
167                                        FloatRegister srcDest) {
168   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpaddsw_mr);
169 }
170 
vpadduswSimd128(const SimdConstant & v,FloatRegister srcDest)171 void MacroAssemblerX86::vpadduswSimd128(const SimdConstant& v,
172                                         FloatRegister srcDest) {
173   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpaddusw_mr);
174 }
175 
vpsubsbSimd128(const SimdConstant & v,FloatRegister srcDest)176 void MacroAssemblerX86::vpsubsbSimd128(const SimdConstant& v,
177                                        FloatRegister srcDest) {
178   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpsubsb_mr);
179 }
180 
vpsubusbSimd128(const SimdConstant & v,FloatRegister srcDest)181 void MacroAssemblerX86::vpsubusbSimd128(const SimdConstant& v,
182                                         FloatRegister srcDest) {
183   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpsubusb_mr);
184 }
185 
vpsubswSimd128(const SimdConstant & v,FloatRegister srcDest)186 void MacroAssemblerX86::vpsubswSimd128(const SimdConstant& v,
187                                        FloatRegister srcDest) {
188   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpsubsw_mr);
189 }
190 
vpsubuswSimd128(const SimdConstant & v,FloatRegister srcDest)191 void MacroAssemblerX86::vpsubuswSimd128(const SimdConstant& v,
192                                         FloatRegister srcDest) {
193   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpsubusw_mr);
194 }
195 
vpminsbSimd128(const SimdConstant & v,FloatRegister srcDest)196 void MacroAssemblerX86::vpminsbSimd128(const SimdConstant& v,
197                                        FloatRegister srcDest) {
198   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpminsb_mr);
199 }
200 
vpminubSimd128(const SimdConstant & v,FloatRegister srcDest)201 void MacroAssemblerX86::vpminubSimd128(const SimdConstant& v,
202                                        FloatRegister srcDest) {
203   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpminub_mr);
204 }
205 
vpminswSimd128(const SimdConstant & v,FloatRegister srcDest)206 void MacroAssemblerX86::vpminswSimd128(const SimdConstant& v,
207                                        FloatRegister srcDest) {
208   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpminsw_mr);
209 }
210 
vpminuwSimd128(const SimdConstant & v,FloatRegister srcDest)211 void MacroAssemblerX86::vpminuwSimd128(const SimdConstant& v,
212                                        FloatRegister srcDest) {
213   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpminuw_mr);
214 }
215 
vpminsdSimd128(const SimdConstant & v,FloatRegister srcDest)216 void MacroAssemblerX86::vpminsdSimd128(const SimdConstant& v,
217                                        FloatRegister srcDest) {
218   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpminsd_mr);
219 }
220 
vpminudSimd128(const SimdConstant & v,FloatRegister srcDest)221 void MacroAssemblerX86::vpminudSimd128(const SimdConstant& v,
222                                        FloatRegister srcDest) {
223   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpminud_mr);
224 }
225 
vpmaxsbSimd128(const SimdConstant & v,FloatRegister srcDest)226 void MacroAssemblerX86::vpmaxsbSimd128(const SimdConstant& v,
227                                        FloatRegister srcDest) {
228   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpmaxsb_mr);
229 }
230 
vpmaxubSimd128(const SimdConstant & v,FloatRegister srcDest)231 void MacroAssemblerX86::vpmaxubSimd128(const SimdConstant& v,
232                                        FloatRegister srcDest) {
233   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpmaxub_mr);
234 }
235 
vpmaxswSimd128(const SimdConstant & v,FloatRegister srcDest)236 void MacroAssemblerX86::vpmaxswSimd128(const SimdConstant& v,
237                                        FloatRegister srcDest) {
238   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpmaxsw_mr);
239 }
240 
vpmaxuwSimd128(const SimdConstant & v,FloatRegister srcDest)241 void MacroAssemblerX86::vpmaxuwSimd128(const SimdConstant& v,
242                                        FloatRegister srcDest) {
243   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpmaxuw_mr);
244 }
245 
vpmaxsdSimd128(const SimdConstant & v,FloatRegister srcDest)246 void MacroAssemblerX86::vpmaxsdSimd128(const SimdConstant& v,
247                                        FloatRegister srcDest) {
248   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpmaxsd_mr);
249 }
250 
vpmaxudSimd128(const SimdConstant & v,FloatRegister srcDest)251 void MacroAssemblerX86::vpmaxudSimd128(const SimdConstant& v,
252                                        FloatRegister srcDest) {
253   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpmaxud_mr);
254 }
255 
vpandSimd128(const SimdConstant & v,FloatRegister srcDest)256 void MacroAssemblerX86::vpandSimd128(const SimdConstant& v,
257                                      FloatRegister srcDest) {
258   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpand_mr);
259 }
260 
vpxorSimd128(const SimdConstant & v,FloatRegister srcDest)261 void MacroAssemblerX86::vpxorSimd128(const SimdConstant& v,
262                                      FloatRegister srcDest) {
263   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpxor_mr);
264 }
265 
vporSimd128(const SimdConstant & v,FloatRegister srcDest)266 void MacroAssemblerX86::vporSimd128(const SimdConstant& v,
267                                     FloatRegister srcDest) {
268   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpor_mr);
269 }
270 
vaddpsSimd128(const SimdConstant & v,FloatRegister srcDest)271 void MacroAssemblerX86::vaddpsSimd128(const SimdConstant& v,
272                                       FloatRegister srcDest) {
273   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vaddps_mr);
274 }
275 
vaddpdSimd128(const SimdConstant & v,FloatRegister srcDest)276 void MacroAssemblerX86::vaddpdSimd128(const SimdConstant& v,
277                                       FloatRegister srcDest) {
278   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vaddpd_mr);
279 }
280 
vsubpsSimd128(const SimdConstant & v,FloatRegister srcDest)281 void MacroAssemblerX86::vsubpsSimd128(const SimdConstant& v,
282                                       FloatRegister srcDest) {
283   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vsubps_mr);
284 }
285 
vsubpdSimd128(const SimdConstant & v,FloatRegister srcDest)286 void MacroAssemblerX86::vsubpdSimd128(const SimdConstant& v,
287                                       FloatRegister srcDest) {
288   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vsubpd_mr);
289 }
290 
vdivpsSimd128(const SimdConstant & v,FloatRegister srcDest)291 void MacroAssemblerX86::vdivpsSimd128(const SimdConstant& v,
292                                       FloatRegister srcDest) {
293   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vdivps_mr);
294 }
295 
vdivpdSimd128(const SimdConstant & v,FloatRegister srcDest)296 void MacroAssemblerX86::vdivpdSimd128(const SimdConstant& v,
297                                       FloatRegister srcDest) {
298   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vdivpd_mr);
299 }
300 
vmulpsSimd128(const SimdConstant & v,FloatRegister srcDest)301 void MacroAssemblerX86::vmulpsSimd128(const SimdConstant& v,
302                                       FloatRegister srcDest) {
303   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vmulps_mr);
304 }
305 
vmulpdSimd128(const SimdConstant & v,FloatRegister srcDest)306 void MacroAssemblerX86::vmulpdSimd128(const SimdConstant& v,
307                                       FloatRegister srcDest) {
308   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vmulpd_mr);
309 }
310 
vpacksswbSimd128(const SimdConstant & v,FloatRegister srcDest)311 void MacroAssemblerX86::vpacksswbSimd128(const SimdConstant& v,
312                                          FloatRegister srcDest) {
313   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpacksswb_mr);
314 }
315 
vpackuswbSimd128(const SimdConstant & v,FloatRegister srcDest)316 void MacroAssemblerX86::vpackuswbSimd128(const SimdConstant& v,
317                                          FloatRegister srcDest) {
318   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpackuswb_mr);
319 }
320 
vpackssdwSimd128(const SimdConstant & v,FloatRegister srcDest)321 void MacroAssemblerX86::vpackssdwSimd128(const SimdConstant& v,
322                                          FloatRegister srcDest) {
323   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpackssdw_mr);
324 }
325 
vpackusdwSimd128(const SimdConstant & v,FloatRegister srcDest)326 void MacroAssemblerX86::vpackusdwSimd128(const SimdConstant& v,
327                                          FloatRegister srcDest) {
328   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpackusdw_mr);
329 }
330 
vpshufbSimd128(const SimdConstant & v,FloatRegister srcDest)331 void MacroAssemblerX86::vpshufbSimd128(const SimdConstant& v,
332                                        FloatRegister srcDest) {
333   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpshufb_mr);
334 }
335 
vptestSimd128(const SimdConstant & v,FloatRegister src)336 void MacroAssemblerX86::vptestSimd128(const SimdConstant& v,
337                                       FloatRegister src) {
338   vpPatchOpSimd128(v, src, &X86Encoding::BaseAssemblerX86::vptest_mr);
339 }
340 
vpmaddwdSimd128(const SimdConstant & v,FloatRegister srcDest)341 void MacroAssemblerX86::vpmaddwdSimd128(const SimdConstant& v,
342                                         FloatRegister srcDest) {
343   vpPatchOpSimd128(v, srcDest, &X86Encoding::BaseAssemblerX86::vpmaddwd_mr);
344 }
345 
vpcmpeqbSimd128(const SimdConstant & v,FloatRegister src)346 void MacroAssemblerX86::vpcmpeqbSimd128(const SimdConstant& v,
347                                         FloatRegister src) {
348   vpPatchOpSimd128(v, src, &X86Encoding::BaseAssemblerX86::vpcmpeqb_mr);
349 }
350 
vpcmpgtbSimd128(const SimdConstant & v,FloatRegister src)351 void MacroAssemblerX86::vpcmpgtbSimd128(const SimdConstant& v,
352                                         FloatRegister src) {
353   vpPatchOpSimd128(v, src, &X86Encoding::BaseAssemblerX86::vpcmpgtb_mr);
354 }
355 
vpcmpeqwSimd128(const SimdConstant & v,FloatRegister src)356 void MacroAssemblerX86::vpcmpeqwSimd128(const SimdConstant& v,
357                                         FloatRegister src) {
358   vpPatchOpSimd128(v, src, &X86Encoding::BaseAssemblerX86::vpcmpeqw_mr);
359 }
360 
vpcmpgtwSimd128(const SimdConstant & v,FloatRegister src)361 void MacroAssemblerX86::vpcmpgtwSimd128(const SimdConstant& v,
362                                         FloatRegister src) {
363   vpPatchOpSimd128(v, src, &X86Encoding::BaseAssemblerX86::vpcmpgtw_mr);
364 }
365 
vpcmpeqdSimd128(const SimdConstant & v,FloatRegister src)366 void MacroAssemblerX86::vpcmpeqdSimd128(const SimdConstant& v,
367                                         FloatRegister src) {
368   vpPatchOpSimd128(v, src, &X86Encoding::BaseAssemblerX86::vpcmpeqd_mr);
369 }
370 
vpcmpgtdSimd128(const SimdConstant & v,FloatRegister src)371 void MacroAssemblerX86::vpcmpgtdSimd128(const SimdConstant& v,
372                                         FloatRegister src) {
373   vpPatchOpSimd128(v, src, &X86Encoding::BaseAssemblerX86::vpcmpgtd_mr);
374 }
375 
vcmpeqpsSimd128(const SimdConstant & v,FloatRegister src)376 void MacroAssemblerX86::vcmpeqpsSimd128(const SimdConstant& v,
377                                         FloatRegister src) {
378   vpPatchOpSimd128(v, src, &X86Encoding::BaseAssemblerX86::vcmpeqps_mr);
379 }
380 
vcmpneqpsSimd128(const SimdConstant & v,FloatRegister src)381 void MacroAssemblerX86::vcmpneqpsSimd128(const SimdConstant& v,
382                                          FloatRegister src) {
383   vpPatchOpSimd128(v, src, &X86Encoding::BaseAssemblerX86::vcmpneqps_mr);
384 }
385 
vcmpltpsSimd128(const SimdConstant & v,FloatRegister src)386 void MacroAssemblerX86::vcmpltpsSimd128(const SimdConstant& v,
387                                         FloatRegister src) {
388   vpPatchOpSimd128(v, src, &X86Encoding::BaseAssemblerX86::vcmpltps_mr);
389 }
390 
vcmplepsSimd128(const SimdConstant & v,FloatRegister src)391 void MacroAssemblerX86::vcmplepsSimd128(const SimdConstant& v,
392                                         FloatRegister src) {
393   vpPatchOpSimd128(v, src, &X86Encoding::BaseAssemblerX86::vcmpleps_mr);
394 }
395 
vcmpeqpdSimd128(const SimdConstant & v,FloatRegister src)396 void MacroAssemblerX86::vcmpeqpdSimd128(const SimdConstant& v,
397                                         FloatRegister src) {
398   vpPatchOpSimd128(v, src, &X86Encoding::BaseAssemblerX86::vcmpeqpd_mr);
399 }
400 
vcmpneqpdSimd128(const SimdConstant & v,FloatRegister src)401 void MacroAssemblerX86::vcmpneqpdSimd128(const SimdConstant& v,
402                                          FloatRegister src) {
403   vpPatchOpSimd128(v, src, &X86Encoding::BaseAssemblerX86::vcmpneqpd_mr);
404 }
405 
vcmpltpdSimd128(const SimdConstant & v,FloatRegister src)406 void MacroAssemblerX86::vcmpltpdSimd128(const SimdConstant& v,
407                                         FloatRegister src) {
408   vpPatchOpSimd128(v, src, &X86Encoding::BaseAssemblerX86::vcmpltpd_mr);
409 }
410 
vcmplepdSimd128(const SimdConstant & v,FloatRegister src)411 void MacroAssemblerX86::vcmplepdSimd128(const SimdConstant& v,
412                                         FloatRegister src) {
413   vpPatchOpSimd128(v, src, &X86Encoding::BaseAssemblerX86::vcmplepd_mr);
414 }
415 
finish()416 void MacroAssemblerX86::finish() {
417   // Last instruction may be an indirect jump so eagerly insert an undefined
418   // instruction byte to prevent processors from decoding data values into
419   // their pipelines. See Intel performance guides.
420   masm.ud2();
421 
422   if (!doubles_.empty()) {
423     masm.haltingAlign(sizeof(double));
424   }
425   for (const Double& d : doubles_) {
426     CodeOffset cst(masm.currentOffset());
427     for (CodeOffset use : d.uses) {
428       addCodeLabel(CodeLabel(use, cst));
429     }
430     masm.doubleConstant(d.value);
431     if (!enoughMemory_) {
432       return;
433     }
434   }
435 
436   if (!floats_.empty()) {
437     masm.haltingAlign(sizeof(float));
438   }
439   for (const Float& f : floats_) {
440     CodeOffset cst(masm.currentOffset());
441     for (CodeOffset use : f.uses) {
442       addCodeLabel(CodeLabel(use, cst));
443     }
444     masm.floatConstant(f.value);
445     if (!enoughMemory_) {
446       return;
447     }
448   }
449 
450   // SIMD memory values must be suitably aligned.
451   if (!simds_.empty()) {
452     masm.haltingAlign(SimdMemoryAlignment);
453   }
454   for (const SimdData& v : simds_) {
455     CodeOffset cst(masm.currentOffset());
456     for (CodeOffset use : v.uses) {
457       addCodeLabel(CodeLabel(use, cst));
458     }
459     masm.simd128Constant(v.value.bytes());
460     if (!enoughMemory_) {
461       return;
462     }
463   }
464 }
465 
handleFailureWithHandlerTail(Label * profilerExitTail)466 void MacroAssemblerX86::handleFailureWithHandlerTail(Label* profilerExitTail) {
467   // Reserve space for exception information.
468   subl(Imm32(sizeof(ResumeFromException)), esp);
469   movl(esp, eax);
470 
471   // Call the handler.
472   using Fn = void (*)(ResumeFromException * rfe);
473   asMasm().setupUnalignedABICall(ecx);
474   asMasm().passABIArg(eax);
475   asMasm().callWithABI<Fn, HandleException>(
476       MoveOp::GENERAL, CheckUnsafeCallWithABI::DontCheckHasExitFrame);
477 
478   Label entryFrame;
479   Label catch_;
480   Label finally;
481   Label return_;
482   Label bailout;
483   Label wasm;
484   Label wasmCatch;
485 
486   loadPtr(Address(esp, offsetof(ResumeFromException, kind)), eax);
487   asMasm().branch32(Assembler::Equal, eax,
488                     Imm32(ResumeFromException::RESUME_ENTRY_FRAME),
489                     &entryFrame);
490   asMasm().branch32(Assembler::Equal, eax,
491                     Imm32(ResumeFromException::RESUME_CATCH), &catch_);
492   asMasm().branch32(Assembler::Equal, eax,
493                     Imm32(ResumeFromException::RESUME_FINALLY), &finally);
494   asMasm().branch32(Assembler::Equal, eax,
495                     Imm32(ResumeFromException::RESUME_FORCED_RETURN), &return_);
496   asMasm().branch32(Assembler::Equal, eax,
497                     Imm32(ResumeFromException::RESUME_BAILOUT), &bailout);
498   asMasm().branch32(Assembler::Equal, eax,
499                     Imm32(ResumeFromException::RESUME_WASM), &wasm);
500   asMasm().branch32(Assembler::Equal, eax,
501                     Imm32(ResumeFromException::RESUME_WASM_CATCH), &wasmCatch);
502 
503   breakpoint();  // Invalid kind.
504 
505   // No exception handler. Load the error value, load the new stack pointer
506   // and return from the entry frame.
507   bind(&entryFrame);
508   asMasm().moveValue(MagicValue(JS_ION_ERROR), JSReturnOperand);
509   loadPtr(Address(esp, offsetof(ResumeFromException, stackPointer)), esp);
510   ret();
511 
512   // If we found a catch handler, this must be a baseline frame. Restore state
513   // and jump to the catch block.
514   bind(&catch_);
515   loadPtr(Address(esp, offsetof(ResumeFromException, target)), eax);
516   loadPtr(Address(esp, offsetof(ResumeFromException, framePointer)), ebp);
517   loadPtr(Address(esp, offsetof(ResumeFromException, stackPointer)), esp);
518   jmp(Operand(eax));
519 
520   // If we found a finally block, this must be a baseline frame. Push
521   // two values expected by JSOp::Retsub: BooleanValue(true) and the
522   // exception.
523   bind(&finally);
524   ValueOperand exception = ValueOperand(ecx, edx);
525   loadValue(Address(esp, offsetof(ResumeFromException, exception)), exception);
526 
527   loadPtr(Address(esp, offsetof(ResumeFromException, target)), eax);
528   loadPtr(Address(esp, offsetof(ResumeFromException, framePointer)), ebp);
529   loadPtr(Address(esp, offsetof(ResumeFromException, stackPointer)), esp);
530 
531   pushValue(BooleanValue(true));
532   pushValue(exception);
533   jmp(Operand(eax));
534 
535   // Only used in debug mode. Return BaselineFrame->returnValue() to the caller.
536   bind(&return_);
537   loadPtr(Address(esp, offsetof(ResumeFromException, framePointer)), ebp);
538   loadPtr(Address(esp, offsetof(ResumeFromException, stackPointer)), esp);
539   loadValue(Address(ebp, BaselineFrame::reverseOffsetOfReturnValue()),
540             JSReturnOperand);
541   movl(ebp, esp);
542   pop(ebp);
543 
544   // If profiling is enabled, then update the lastProfilingFrame to refer to
545   // caller frame before returning.
546   {
547     Label skipProfilingInstrumentation;
548     // Test if profiler enabled.
549     AbsoluteAddress addressOfEnabled(
550         GetJitContext()->runtime->geckoProfiler().addressOfEnabled());
551     asMasm().branch32(Assembler::Equal, addressOfEnabled, Imm32(0),
552                       &skipProfilingInstrumentation);
553     jump(profilerExitTail);
554     bind(&skipProfilingInstrumentation);
555   }
556 
557   ret();
558 
559   // If we are bailing out to baseline to handle an exception, jump to the
560   // bailout tail stub. Load 1 (true) in ReturnReg to indicate success.
561   bind(&bailout);
562   loadPtr(Address(esp, offsetof(ResumeFromException, bailoutInfo)), ecx);
563   move32(Imm32(1), ReturnReg);
564   jmp(Operand(esp, offsetof(ResumeFromException, target)));
565 
566   // If we are throwing and the innermost frame was a wasm frame, reset SP and
567   // FP; SP is pointing to the unwound return address to the wasm entry, so
568   // we can just ret().
569   bind(&wasm);
570   loadPtr(Address(esp, offsetof(ResumeFromException, framePointer)), ebp);
571   loadPtr(Address(esp, offsetof(ResumeFromException, stackPointer)), esp);
572   masm.ret();
573 
574   // Found a wasm catch handler, restore state and jump to it.
575   bind(&wasmCatch);
576   loadPtr(Address(esp, offsetof(ResumeFromException, target)), eax);
577   loadPtr(Address(esp, offsetof(ResumeFromException, framePointer)), ebp);
578   loadPtr(Address(esp, offsetof(ResumeFromException, stackPointer)), esp);
579   jmp(Operand(eax));
580 }
581 
profilerEnterFrame(Register framePtr,Register scratch)582 void MacroAssemblerX86::profilerEnterFrame(Register framePtr,
583                                            Register scratch) {
584   asMasm().loadJSContext(scratch);
585   loadPtr(Address(scratch, offsetof(JSContext, profilingActivation_)), scratch);
586   storePtr(framePtr,
587            Address(scratch, JitActivation::offsetOfLastProfilingFrame()));
588   storePtr(ImmPtr(nullptr),
589            Address(scratch, JitActivation::offsetOfLastProfilingCallSite()));
590 }
591 
profilerExitFrame()592 void MacroAssemblerX86::profilerExitFrame() {
593   jump(GetJitContext()->runtime->jitRuntime()->getProfilerExitFrameTail());
594 }
595 
asMasm()596 MacroAssembler& MacroAssemblerX86::asMasm() {
597   return *static_cast<MacroAssembler*>(this);
598 }
599 
asMasm() const600 const MacroAssembler& MacroAssemblerX86::asMasm() const {
601   return *static_cast<const MacroAssembler*>(this);
602 }
603 
subFromStackPtr(Imm32 imm32)604 void MacroAssembler::subFromStackPtr(Imm32 imm32) {
605   if (imm32.value) {
606     // On windows, we cannot skip very far down the stack without touching the
607     // memory pages in-between.  This is a corner-case code for situations where
608     // the Ion frame data for a piece of code is very large.  To handle this
609     // special case, for frames over 4k in size we allocate memory on the stack
610     // incrementally, touching it as we go.
611     //
612     // When the amount is quite large, which it can be, we emit an actual loop,
613     // in order to keep the function prologue compact.  Compactness is a
614     // requirement for eg Wasm's CodeRange data structure, which can encode only
615     // 8-bit offsets.
616     uint32_t amountLeft = imm32.value;
617     uint32_t fullPages = amountLeft / 4096;
618     if (fullPages <= 8) {
619       while (amountLeft > 4096) {
620         subl(Imm32(4096), StackPointer);
621         store32(Imm32(0), Address(StackPointer, 0));
622         amountLeft -= 4096;
623       }
624       subl(Imm32(amountLeft), StackPointer);
625     } else {
626       // Save scratch register.
627       push(eax);
628       amountLeft -= 4;
629       fullPages = amountLeft / 4096;
630 
631       Label top;
632       move32(Imm32(fullPages), eax);
633       bind(&top);
634       subl(Imm32(4096), StackPointer);
635       store32(Imm32(0), Address(StackPointer, 0));
636       subl(Imm32(1), eax);
637       j(Assembler::NonZero, &top);
638       amountLeft -= fullPages * 4096;
639       if (amountLeft) {
640         subl(Imm32(amountLeft), StackPointer);
641       }
642 
643       // Restore scratch register.
644       movl(Operand(StackPointer, uint32_t(imm32.value) - 4), eax);
645     }
646   }
647 }
648 
649 //{{{ check_macroassembler_style
650 // ===============================================================
651 // ABI function calls.
652 
setupUnalignedABICall(Register scratch)653 void MacroAssembler::setupUnalignedABICall(Register scratch) {
654   setupNativeABICall();
655   dynamicAlignment_ = true;
656 
657   movl(esp, scratch);
658   andl(Imm32(~(ABIStackAlignment - 1)), esp);
659   push(scratch);
660 }
661 
callWithABIPre(uint32_t * stackAdjust,bool callFromWasm)662 void MacroAssembler::callWithABIPre(uint32_t* stackAdjust, bool callFromWasm) {
663   MOZ_ASSERT(inCall_);
664   uint32_t stackForCall = abiArgs_.stackBytesConsumedSoFar();
665 
666   if (dynamicAlignment_) {
667     // sizeof(intptr_t) accounts for the saved stack pointer pushed by
668     // setupUnalignedABICall.
669     stackForCall += ComputeByteAlignment(stackForCall + sizeof(intptr_t),
670                                          ABIStackAlignment);
671   } else {
672     uint32_t alignmentAtPrologue = callFromWasm ? sizeof(wasm::Frame) : 0;
673     stackForCall += ComputeByteAlignment(
674         stackForCall + framePushed() + alignmentAtPrologue, ABIStackAlignment);
675   }
676 
677   *stackAdjust = stackForCall;
678   reserveStack(stackForCall);
679 
680   // Position all arguments.
681   {
682     enoughMemory_ &= moveResolver_.resolve();
683     if (!enoughMemory_) {
684       return;
685     }
686 
687     MoveEmitter emitter(*this);
688     emitter.emit(moveResolver_);
689     emitter.finish();
690   }
691 
692   assertStackAlignment(ABIStackAlignment);
693 }
694 
callWithABIPost(uint32_t stackAdjust,MoveOp::Type result,bool callFromWasm)695 void MacroAssembler::callWithABIPost(uint32_t stackAdjust, MoveOp::Type result,
696                                      bool callFromWasm) {
697   freeStack(stackAdjust);
698 
699   // Calls to native functions in wasm pass through a thunk which already
700   // fixes up the return value for us.
701   if (!callFromWasm) {
702     if (result == MoveOp::DOUBLE) {
703       reserveStack(sizeof(double));
704       fstp(Operand(esp, 0));
705       loadDouble(Operand(esp, 0), ReturnDoubleReg);
706       freeStack(sizeof(double));
707     } else if (result == MoveOp::FLOAT32) {
708       reserveStack(sizeof(float));
709       fstp32(Operand(esp, 0));
710       loadFloat32(Operand(esp, 0), ReturnFloat32Reg);
711       freeStack(sizeof(float));
712     }
713   }
714 
715   if (dynamicAlignment_) {
716     pop(esp);
717   }
718 
719 #ifdef DEBUG
720   MOZ_ASSERT(inCall_);
721   inCall_ = false;
722 #endif
723 }
724 
callWithABINoProfiler(Register fun,MoveOp::Type result)725 void MacroAssembler::callWithABINoProfiler(Register fun, MoveOp::Type result) {
726   uint32_t stackAdjust;
727   callWithABIPre(&stackAdjust);
728   call(fun);
729   callWithABIPost(stackAdjust, result);
730 }
731 
callWithABINoProfiler(const Address & fun,MoveOp::Type result)732 void MacroAssembler::callWithABINoProfiler(const Address& fun,
733                                            MoveOp::Type result) {
734   uint32_t stackAdjust;
735   callWithABIPre(&stackAdjust);
736   call(fun);
737   callWithABIPost(stackAdjust, result);
738 }
739 
740 // ===============================================================
741 // Move instructions
742 
moveValue(const TypedOrValueRegister & src,const ValueOperand & dest)743 void MacroAssembler::moveValue(const TypedOrValueRegister& src,
744                                const ValueOperand& dest) {
745   if (src.hasValue()) {
746     moveValue(src.valueReg(), dest);
747     return;
748   }
749 
750   MIRType type = src.type();
751   AnyRegister reg = src.typedReg();
752 
753   if (!IsFloatingPointType(type)) {
754     if (reg.gpr() != dest.payloadReg()) {
755       movl(reg.gpr(), dest.payloadReg());
756     }
757     mov(ImmWord(MIRTypeToTag(type)), dest.typeReg());
758     return;
759   }
760 
761   ScratchDoubleScope scratch(*this);
762   FloatRegister freg = reg.fpu();
763   if (type == MIRType::Float32) {
764     convertFloat32ToDouble(freg, scratch);
765     freg = scratch;
766   }
767   boxDouble(freg, dest, scratch);
768 }
769 
moveValue(const ValueOperand & src,const ValueOperand & dest)770 void MacroAssembler::moveValue(const ValueOperand& src,
771                                const ValueOperand& dest) {
772   Register s0 = src.typeReg();
773   Register s1 = src.payloadReg();
774   Register d0 = dest.typeReg();
775   Register d1 = dest.payloadReg();
776 
777   // Either one or both of the source registers could be the same as a
778   // destination register.
779   if (s1 == d0) {
780     if (s0 == d1) {
781       // If both are, this is just a swap of two registers.
782       xchgl(d0, d1);
783       return;
784     }
785     // If only one is, copy that source first.
786     std::swap(s0, s1);
787     std::swap(d0, d1);
788   }
789 
790   if (s0 != d0) {
791     movl(s0, d0);
792   }
793   if (s1 != d1) {
794     movl(s1, d1);
795   }
796 }
797 
moveValue(const Value & src,const ValueOperand & dest)798 void MacroAssembler::moveValue(const Value& src, const ValueOperand& dest) {
799   movl(Imm32(src.toNunboxTag()), dest.typeReg());
800   if (src.isGCThing()) {
801     movl(ImmGCPtr(src.toGCThing()), dest.payloadReg());
802   } else {
803     movl(Imm32(src.toNunboxPayload()), dest.payloadReg());
804   }
805 }
806 
807 // ===============================================================
808 // Branch functions
809 
loadStoreBuffer(Register ptr,Register buffer)810 void MacroAssembler::loadStoreBuffer(Register ptr, Register buffer) {
811   if (ptr != buffer) {
812     movePtr(ptr, buffer);
813   }
814   orPtr(Imm32(gc::ChunkMask), buffer);
815   loadPtr(Address(buffer, gc::ChunkStoreBufferOffsetFromLastByte), buffer);
816 }
817 
branchPtrInNurseryChunk(Condition cond,Register ptr,Register temp,Label * label)818 void MacroAssembler::branchPtrInNurseryChunk(Condition cond, Register ptr,
819                                              Register temp, Label* label) {
820   MOZ_ASSERT(temp != InvalidReg);  // A temp register is required for x86.
821   MOZ_ASSERT(ptr != temp);
822   movePtr(ptr, temp);
823   branchPtrInNurseryChunkImpl(cond, temp, label);
824 }
825 
branchPtrInNurseryChunk(Condition cond,const Address & address,Register temp,Label * label)826 void MacroAssembler::branchPtrInNurseryChunk(Condition cond,
827                                              const Address& address,
828                                              Register temp, Label* label) {
829   MOZ_ASSERT(temp != InvalidReg);  // A temp register is required for x86.
830   loadPtr(address, temp);
831   branchPtrInNurseryChunkImpl(cond, temp, label);
832 }
833 
branchPtrInNurseryChunkImpl(Condition cond,Register ptr,Label * label)834 void MacroAssembler::branchPtrInNurseryChunkImpl(Condition cond, Register ptr,
835                                                  Label* label) {
836   MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
837 
838   orPtr(Imm32(gc::ChunkMask), ptr);
839   branchPtr(InvertCondition(cond),
840             Address(ptr, gc::ChunkStoreBufferOffsetFromLastByte), ImmWord(0),
841             label);
842 }
843 
branchValueIsNurseryCell(Condition cond,const Address & address,Register temp,Label * label)844 void MacroAssembler::branchValueIsNurseryCell(Condition cond,
845                                               const Address& address,
846                                               Register temp, Label* label) {
847   MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
848 
849   Label done;
850 
851   branchTestGCThing(Assembler::NotEqual, address,
852                     cond == Assembler::Equal ? &done : label);
853   branchPtrInNurseryChunk(cond, ToPayload(address), temp, label);
854 
855   bind(&done);
856 }
857 
branchValueIsNurseryCell(Condition cond,ValueOperand value,Register temp,Label * label)858 void MacroAssembler::branchValueIsNurseryCell(Condition cond,
859                                               ValueOperand value, Register temp,
860                                               Label* label) {
861   MOZ_ASSERT(cond == Assembler::Equal || cond == Assembler::NotEqual);
862 
863   Label done;
864 
865   branchTestGCThing(Assembler::NotEqual, value,
866                     cond == Assembler::Equal ? &done : label);
867   branchPtrInNurseryChunk(cond, value.payloadReg(), temp, label);
868 
869   bind(&done);
870 }
871 
branchTestValue(Condition cond,const ValueOperand & lhs,const Value & rhs,Label * label)872 void MacroAssembler::branchTestValue(Condition cond, const ValueOperand& lhs,
873                                      const Value& rhs, Label* label) {
874   MOZ_ASSERT(cond == Equal || cond == NotEqual);
875   if (rhs.isGCThing()) {
876     cmpPtr(lhs.payloadReg(), ImmGCPtr(rhs.toGCThing()));
877   } else {
878     cmpPtr(lhs.payloadReg(), ImmWord(rhs.toNunboxPayload()));
879   }
880 
881   if (cond == Equal) {
882     Label done;
883     j(NotEqual, &done);
884     {
885       cmp32(lhs.typeReg(), Imm32(rhs.toNunboxTag()));
886       j(Equal, label);
887     }
888     bind(&done);
889   } else {
890     j(NotEqual, label);
891 
892     cmp32(lhs.typeReg(), Imm32(rhs.toNunboxTag()));
893     j(NotEqual, label);
894   }
895 }
896 
897 // ========================================================================
898 // Memory access primitives.
899 template <typename T>
storeUnboxedValue(const ConstantOrRegister & value,MIRType valueType,const T & dest,MIRType slotType)900 void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value,
901                                        MIRType valueType, const T& dest,
902                                        MIRType slotType) {
903   if (valueType == MIRType::Double) {
904     storeDouble(value.reg().typedReg().fpu(), dest);
905     return;
906   }
907 
908   // Store the type tag if needed.
909   if (valueType != slotType) {
910     storeTypeTag(ImmType(ValueTypeFromMIRType(valueType)), Operand(dest));
911   }
912 
913   // Store the payload.
914   if (value.constant()) {
915     storePayload(value.value(), Operand(dest));
916   } else {
917     storePayload(value.reg().typedReg().gpr(), Operand(dest));
918   }
919 }
920 
921 template void MacroAssembler::storeUnboxedValue(const ConstantOrRegister& value,
922                                                 MIRType valueType,
923                                                 const Address& dest,
924                                                 MIRType slotType);
925 template void MacroAssembler::storeUnboxedValue(
926     const ConstantOrRegister& value, MIRType valueType,
927     const BaseObjectElementIndex& dest, MIRType slotType);
928 
929 // wasm specific methods, used in both the wasm baseline compiler and ion.
930 
wasmLoad(const wasm::MemoryAccessDesc & access,Operand srcAddr,AnyRegister out)931 void MacroAssembler::wasmLoad(const wasm::MemoryAccessDesc& access,
932                               Operand srcAddr, AnyRegister out) {
933   MOZ_ASSERT(srcAddr.kind() == Operand::MEM_REG_DISP ||
934              srcAddr.kind() == Operand::MEM_SCALE);
935 
936   MOZ_ASSERT_IF(
937       access.isZeroExtendSimd128Load(),
938       access.type() == Scalar::Float32 || access.type() == Scalar::Float64);
939   MOZ_ASSERT_IF(access.isSplatSimd128Load(), access.type() == Scalar::Float64);
940   MOZ_ASSERT_IF(access.isWidenSimd128Load(), access.type() == Scalar::Float64);
941 
942   memoryBarrierBefore(access.sync());
943 
944   append(access, size());
945   switch (access.type()) {
946     case Scalar::Int8:
947       movsbl(srcAddr, out.gpr());
948       break;
949     case Scalar::Uint8:
950       movzbl(srcAddr, out.gpr());
951       break;
952     case Scalar::Int16:
953       movswl(srcAddr, out.gpr());
954       break;
955     case Scalar::Uint16:
956       movzwl(srcAddr, out.gpr());
957       break;
958     case Scalar::Int32:
959     case Scalar::Uint32:
960       movl(srcAddr, out.gpr());
961       break;
962     case Scalar::Float32:
963       // vmovss does the right thing also for access.isZeroExtendSimd128Load()
964       vmovss(srcAddr, out.fpu());
965       break;
966     case Scalar::Float64:
967       if (access.isSplatSimd128Load()) {
968         vmovddup(srcAddr, out.fpu());
969       } else if (access.isWidenSimd128Load()) {
970         switch (access.widenSimdOp()) {
971           case wasm::SimdOp::I16x8LoadS8x8:
972             vpmovsxbw(srcAddr, out.fpu());
973             break;
974           case wasm::SimdOp::I16x8LoadU8x8:
975             vpmovzxbw(srcAddr, out.fpu());
976             break;
977           case wasm::SimdOp::I32x4LoadS16x4:
978             vpmovsxwd(srcAddr, out.fpu());
979             break;
980           case wasm::SimdOp::I32x4LoadU16x4:
981             vpmovzxwd(srcAddr, out.fpu());
982             break;
983           case wasm::SimdOp::I64x2LoadS32x2:
984             vpmovsxdq(srcAddr, out.fpu());
985             break;
986           case wasm::SimdOp::I64x2LoadU32x2:
987             vpmovzxdq(srcAddr, out.fpu());
988             break;
989           default:
990             MOZ_CRASH("Unexpected widening op for wasmLoad");
991         }
992       } else {
993         // vmovsd does the right thing also for access.isZeroExtendSimd128Load()
994         vmovsd(srcAddr, out.fpu());
995       }
996       break;
997     case Scalar::Simd128:
998       vmovups(srcAddr, out.fpu());
999       break;
1000     case Scalar::Int64:
1001     case Scalar::Uint8Clamped:
1002     case Scalar::BigInt64:
1003     case Scalar::BigUint64:
1004     case Scalar::MaxTypedArrayViewType:
1005       MOZ_CRASH("unexpected type");
1006   }
1007 
1008   memoryBarrierAfter(access.sync());
1009 }
1010 
wasmLoadI64(const wasm::MemoryAccessDesc & access,Operand srcAddr,Register64 out)1011 void MacroAssembler::wasmLoadI64(const wasm::MemoryAccessDesc& access,
1012                                  Operand srcAddr, Register64 out) {
1013   // Atomic i64 load must use lock_cmpxchg8b.
1014   MOZ_ASSERT_IF(access.isAtomic(), access.byteSize() <= 4);
1015   MOZ_ASSERT(srcAddr.kind() == Operand::MEM_REG_DISP ||
1016              srcAddr.kind() == Operand::MEM_SCALE);
1017   MOZ_ASSERT(!access.isZeroExtendSimd128Load());  // Use wasmLoad()
1018   MOZ_ASSERT(!access.isSplatSimd128Load());       // Use wasmLoad()
1019   MOZ_ASSERT(!access.isWidenSimd128Load());       // Use wasmLoad()
1020 
1021   memoryBarrierBefore(access.sync());
1022 
1023   append(access, size());
1024   switch (access.type()) {
1025     case Scalar::Int8:
1026       MOZ_ASSERT(out == Register64(edx, eax));
1027       movsbl(srcAddr, out.low);
1028 
1029       cdq();
1030       break;
1031     case Scalar::Uint8:
1032       movzbl(srcAddr, out.low);
1033 
1034       xorl(out.high, out.high);
1035       break;
1036     case Scalar::Int16:
1037       MOZ_ASSERT(out == Register64(edx, eax));
1038       movswl(srcAddr, out.low);
1039 
1040       cdq();
1041       break;
1042     case Scalar::Uint16:
1043       movzwl(srcAddr, out.low);
1044 
1045       xorl(out.high, out.high);
1046       break;
1047     case Scalar::Int32:
1048       MOZ_ASSERT(out == Register64(edx, eax));
1049       movl(srcAddr, out.low);
1050 
1051       cdq();
1052       break;
1053     case Scalar::Uint32:
1054       movl(srcAddr, out.low);
1055 
1056       xorl(out.high, out.high);
1057       break;
1058     case Scalar::Int64: {
1059       if (srcAddr.kind() == Operand::MEM_SCALE) {
1060         MOZ_RELEASE_ASSERT(srcAddr.toBaseIndex().base != out.low &&
1061                            srcAddr.toBaseIndex().index != out.low);
1062       }
1063       if (srcAddr.kind() == Operand::MEM_REG_DISP) {
1064         MOZ_RELEASE_ASSERT(srcAddr.toAddress().base != out.low);
1065       }
1066 
1067       movl(LowWord(srcAddr), out.low);
1068 
1069       append(access, size());
1070       movl(HighWord(srcAddr), out.high);
1071 
1072       break;
1073     }
1074     case Scalar::Float32:
1075     case Scalar::Float64:
1076       MOZ_CRASH("non-int64 loads should use load()");
1077     case Scalar::Simd128:
1078     case Scalar::Uint8Clamped:
1079     case Scalar::BigInt64:
1080     case Scalar::BigUint64:
1081     case Scalar::MaxTypedArrayViewType:
1082       MOZ_CRASH("unexpected array type");
1083   }
1084 
1085   memoryBarrierAfter(access.sync());
1086 }
1087 
wasmStore(const wasm::MemoryAccessDesc & access,AnyRegister value,Operand dstAddr)1088 void MacroAssembler::wasmStore(const wasm::MemoryAccessDesc& access,
1089                                AnyRegister value, Operand dstAddr) {
1090   MOZ_ASSERT(dstAddr.kind() == Operand::MEM_REG_DISP ||
1091              dstAddr.kind() == Operand::MEM_SCALE);
1092 
1093   memoryBarrierBefore(access.sync());
1094 
1095   append(access, size());
1096   switch (access.type()) {
1097     case Scalar::Int8:
1098     case Scalar::Uint8Clamped:
1099     case Scalar::Uint8:
1100       movb(value.gpr(), dstAddr);
1101       break;
1102     case Scalar::Int16:
1103     case Scalar::Uint16:
1104       movw(value.gpr(), dstAddr);
1105       break;
1106     case Scalar::Int32:
1107     case Scalar::Uint32:
1108       movl(value.gpr(), dstAddr);
1109       break;
1110     case Scalar::Float32:
1111       vmovss(value.fpu(), dstAddr);
1112       break;
1113     case Scalar::Float64:
1114       vmovsd(value.fpu(), dstAddr);
1115       break;
1116     case Scalar::Simd128:
1117       vmovups(value.fpu(), dstAddr);
1118       break;
1119     case Scalar::Int64:
1120       MOZ_CRASH("Should be handled in storeI64.");
1121     case Scalar::MaxTypedArrayViewType:
1122     case Scalar::BigInt64:
1123     case Scalar::BigUint64:
1124       MOZ_CRASH("unexpected type");
1125   }
1126 
1127   memoryBarrierAfter(access.sync());
1128 }
1129 
wasmStoreI64(const wasm::MemoryAccessDesc & access,Register64 value,Operand dstAddr)1130 void MacroAssembler::wasmStoreI64(const wasm::MemoryAccessDesc& access,
1131                                   Register64 value, Operand dstAddr) {
1132   // Atomic i64 store must use lock_cmpxchg8b.
1133   MOZ_ASSERT(!access.isAtomic());
1134   MOZ_ASSERT(dstAddr.kind() == Operand::MEM_REG_DISP ||
1135              dstAddr.kind() == Operand::MEM_SCALE);
1136 
1137   // Store the high word first so as to hit guard-page-based OOB checks without
1138   // writing partial data.
1139   append(access, size());
1140   movl(value.high, HighWord(dstAddr));
1141 
1142   append(access, size());
1143   movl(value.low, LowWord(dstAddr));
1144 }
1145 
1146 template <typename T>
AtomicLoad64(MacroAssembler & masm,const wasm::MemoryAccessDesc * access,const T & address,Register64 temp,Register64 output)1147 static void AtomicLoad64(MacroAssembler& masm,
1148                          const wasm::MemoryAccessDesc* access, const T& address,
1149                          Register64 temp, Register64 output) {
1150   MOZ_ASSERT(temp.low == ebx);
1151   MOZ_ASSERT(temp.high == ecx);
1152   MOZ_ASSERT(output.high == edx);
1153   MOZ_ASSERT(output.low == eax);
1154 
1155   // In the event edx:eax matches what's in memory, ecx:ebx will be
1156   // stored.  The two pairs must therefore have the same values.
1157   masm.movl(edx, ecx);
1158   masm.movl(eax, ebx);
1159 
1160   if (access) {
1161     masm.append(*access, masm.size());
1162   }
1163   masm.lock_cmpxchg8b(edx, eax, ecx, ebx, Operand(address));
1164 }
1165 
wasmAtomicLoad64(const wasm::MemoryAccessDesc & access,const Address & mem,Register64 temp,Register64 output)1166 void MacroAssembler::wasmAtomicLoad64(const wasm::MemoryAccessDesc& access,
1167                                       const Address& mem, Register64 temp,
1168                                       Register64 output) {
1169   AtomicLoad64(*this, &access, mem, temp, output);
1170 }
1171 
wasmAtomicLoad64(const wasm::MemoryAccessDesc & access,const BaseIndex & mem,Register64 temp,Register64 output)1172 void MacroAssembler::wasmAtomicLoad64(const wasm::MemoryAccessDesc& access,
1173                                       const BaseIndex& mem, Register64 temp,
1174                                       Register64 output) {
1175   AtomicLoad64(*this, &access, mem, temp, output);
1176 }
1177 
1178 template <typename T>
CompareExchange64(MacroAssembler & masm,const wasm::MemoryAccessDesc * access,const T & mem,Register64 expected,Register64 replacement,Register64 output)1179 static void CompareExchange64(MacroAssembler& masm,
1180                               const wasm::MemoryAccessDesc* access,
1181                               const T& mem, Register64 expected,
1182                               Register64 replacement, Register64 output) {
1183   MOZ_ASSERT(expected == output);
1184   MOZ_ASSERT(expected.high == edx);
1185   MOZ_ASSERT(expected.low == eax);
1186   MOZ_ASSERT(replacement.high == ecx);
1187   MOZ_ASSERT(replacement.low == ebx);
1188 
1189   if (access) {
1190     masm.append(*access, masm.size());
1191   }
1192   masm.lock_cmpxchg8b(edx, eax, ecx, ebx, Operand(mem));
1193 }
1194 
wasmCompareExchange64(const wasm::MemoryAccessDesc & access,const Address & mem,Register64 expected,Register64 replacement,Register64 output)1195 void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access,
1196                                            const Address& mem,
1197                                            Register64 expected,
1198                                            Register64 replacement,
1199                                            Register64 output) {
1200   CompareExchange64(*this, &access, mem, expected, replacement, output);
1201 }
1202 
wasmCompareExchange64(const wasm::MemoryAccessDesc & access,const BaseIndex & mem,Register64 expected,Register64 replacement,Register64 output)1203 void MacroAssembler::wasmCompareExchange64(const wasm::MemoryAccessDesc& access,
1204                                            const BaseIndex& mem,
1205                                            Register64 expected,
1206                                            Register64 replacement,
1207                                            Register64 output) {
1208   CompareExchange64(*this, &access, mem, expected, replacement, output);
1209 }
1210 
1211 template <typename T>
AtomicExchange64(MacroAssembler & masm,const wasm::MemoryAccessDesc * access,const T & mem,Register64 value,Register64 output)1212 static void AtomicExchange64(MacroAssembler& masm,
1213                              const wasm::MemoryAccessDesc* access, const T& mem,
1214                              Register64 value, Register64 output) {
1215   MOZ_ASSERT(value.low == ebx);
1216   MOZ_ASSERT(value.high == ecx);
1217   MOZ_ASSERT(output.high == edx);
1218   MOZ_ASSERT(output.low == eax);
1219 
1220   // edx:eax has garbage initially, and that is the best we can do unless
1221   // we can guess with high probability what's in memory.
1222 
1223   MOZ_ASSERT(mem.base != edx && mem.base != eax);
1224   if constexpr (std::is_same_v<T, BaseIndex>) {
1225     MOZ_ASSERT(mem.index != edx && mem.index != eax);
1226   } else {
1227     static_assert(std::is_same_v<T, Address>);
1228   }
1229 
1230   Label again;
1231   masm.bind(&again);
1232   if (access) {
1233     masm.append(*access, masm.size());
1234   }
1235   masm.lock_cmpxchg8b(edx, eax, ecx, ebx, Operand(mem));
1236   masm.j(MacroAssembler::NonZero, &again);
1237 }
1238 
wasmAtomicExchange64(const wasm::MemoryAccessDesc & access,const Address & mem,Register64 value,Register64 output)1239 void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access,
1240                                           const Address& mem, Register64 value,
1241                                           Register64 output) {
1242   AtomicExchange64(*this, &access, mem, value, output);
1243 }
1244 
wasmAtomicExchange64(const wasm::MemoryAccessDesc & access,const BaseIndex & mem,Register64 value,Register64 output)1245 void MacroAssembler::wasmAtomicExchange64(const wasm::MemoryAccessDesc& access,
1246                                           const BaseIndex& mem,
1247                                           Register64 value, Register64 output) {
1248   AtomicExchange64(*this, &access, mem, value, output);
1249 }
1250 
1251 template <typename T>
AtomicFetchOp64(MacroAssembler & masm,const wasm::MemoryAccessDesc * access,AtomicOp op,const Address & value,const T & mem,Register64 temp,Register64 output)1252 static void AtomicFetchOp64(MacroAssembler& masm,
1253                             const wasm::MemoryAccessDesc* access, AtomicOp op,
1254                             const Address& value, const T& mem, Register64 temp,
1255                             Register64 output) {
1256   // We don't have enough registers for all the operands on x86, so the rhs
1257   // operand is in memory.
1258 
1259 #define ATOMIC_OP_BODY(OPERATE)                            \
1260   do {                                                     \
1261     MOZ_ASSERT(output.low == eax);                         \
1262     MOZ_ASSERT(output.high == edx);                        \
1263     MOZ_ASSERT(temp.low == ebx);                           \
1264     MOZ_ASSERT(temp.high == ecx);                          \
1265     if (access) {                                          \
1266       masm.append(*access, masm.size());                   \
1267     }                                                      \
1268     masm.load64(mem, output);                              \
1269     Label again;                                           \
1270     masm.bind(&again);                                     \
1271     masm.move64(output, temp);                             \
1272     masm.OPERATE(Operand(value), temp);                    \
1273     masm.lock_cmpxchg8b(edx, eax, ecx, ebx, Operand(mem)); \
1274     masm.j(MacroAssembler::NonZero, &again);               \
1275   } while (0)
1276 
1277   switch (op) {
1278     case AtomicFetchAddOp:
1279       ATOMIC_OP_BODY(add64FromMemory);
1280       break;
1281     case AtomicFetchSubOp:
1282       ATOMIC_OP_BODY(sub64FromMemory);
1283       break;
1284     case AtomicFetchAndOp:
1285       ATOMIC_OP_BODY(and64FromMemory);
1286       break;
1287     case AtomicFetchOrOp:
1288       ATOMIC_OP_BODY(or64FromMemory);
1289       break;
1290     case AtomicFetchXorOp:
1291       ATOMIC_OP_BODY(xor64FromMemory);
1292       break;
1293     default:
1294       MOZ_CRASH();
1295   }
1296 
1297 #undef ATOMIC_OP_BODY
1298 }
1299 
wasmAtomicFetchOp64(const wasm::MemoryAccessDesc & access,AtomicOp op,const Address & value,const Address & mem,Register64 temp,Register64 output)1300 void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access,
1301                                          AtomicOp op, const Address& value,
1302                                          const Address& mem, Register64 temp,
1303                                          Register64 output) {
1304   AtomicFetchOp64(*this, &access, op, value, mem, temp, output);
1305 }
1306 
wasmAtomicFetchOp64(const wasm::MemoryAccessDesc & access,AtomicOp op,const Address & value,const BaseIndex & mem,Register64 temp,Register64 output)1307 void MacroAssembler::wasmAtomicFetchOp64(const wasm::MemoryAccessDesc& access,
1308                                          AtomicOp op, const Address& value,
1309                                          const BaseIndex& mem, Register64 temp,
1310                                          Register64 output) {
1311   AtomicFetchOp64(*this, &access, op, value, mem, temp, output);
1312 }
1313 
wasmTruncateDoubleToUInt32(FloatRegister input,Register output,bool isSaturating,Label * oolEntry)1314 void MacroAssembler::wasmTruncateDoubleToUInt32(FloatRegister input,
1315                                                 Register output,
1316                                                 bool isSaturating,
1317                                                 Label* oolEntry) {
1318   Label done;
1319   vcvttsd2si(input, output);
1320   branch32(Assembler::Condition::NotSigned, output, Imm32(0), &done);
1321 
1322   ScratchDoubleScope fpscratch(*this);
1323   loadConstantDouble(double(int32_t(0x80000000)), fpscratch);
1324   addDouble(input, fpscratch);
1325   vcvttsd2si(fpscratch, output);
1326 
1327   branch32(Assembler::Condition::Signed, output, Imm32(0), oolEntry);
1328   or32(Imm32(0x80000000), output);
1329 
1330   bind(&done);
1331 }
1332 
wasmTruncateFloat32ToUInt32(FloatRegister input,Register output,bool isSaturating,Label * oolEntry)1333 void MacroAssembler::wasmTruncateFloat32ToUInt32(FloatRegister input,
1334                                                  Register output,
1335                                                  bool isSaturating,
1336                                                  Label* oolEntry) {
1337   Label done;
1338   vcvttss2si(input, output);
1339   branch32(Assembler::Condition::NotSigned, output, Imm32(0), &done);
1340 
1341   ScratchFloat32Scope fpscratch(*this);
1342   loadConstantFloat32(float(int32_t(0x80000000)), fpscratch);
1343   addFloat32(input, fpscratch);
1344   vcvttss2si(fpscratch, output);
1345 
1346   branch32(Assembler::Condition::Signed, output, Imm32(0), oolEntry);
1347   or32(Imm32(0x80000000), output);
1348 
1349   bind(&done);
1350 }
1351 
wasmTruncateDoubleToInt64(FloatRegister input,Register64 output,bool isSaturating,Label * oolEntry,Label * oolRejoin,FloatRegister tempReg)1352 void MacroAssembler::wasmTruncateDoubleToInt64(
1353     FloatRegister input, Register64 output, bool isSaturating, Label* oolEntry,
1354     Label* oolRejoin, FloatRegister tempReg) {
1355   Label ok;
1356   Register temp = output.high;
1357 
1358   reserveStack(2 * sizeof(int32_t));
1359   storeDouble(input, Operand(esp, 0));
1360 
1361   truncateDoubleToInt64(Address(esp, 0), Address(esp, 0), temp);
1362   load64(Address(esp, 0), output);
1363 
1364   cmpl(Imm32(0), Operand(esp, 0));
1365   j(Assembler::NotEqual, &ok);
1366 
1367   cmpl(Imm32(1), Operand(esp, 4));
1368   j(Assembler::Overflow, oolEntry);
1369 
1370   bind(&ok);
1371   bind(oolRejoin);
1372 
1373   freeStack(2 * sizeof(int32_t));
1374 }
1375 
wasmTruncateFloat32ToInt64(FloatRegister input,Register64 output,bool isSaturating,Label * oolEntry,Label * oolRejoin,FloatRegister tempReg)1376 void MacroAssembler::wasmTruncateFloat32ToInt64(
1377     FloatRegister input, Register64 output, bool isSaturating, Label* oolEntry,
1378     Label* oolRejoin, FloatRegister tempReg) {
1379   Label ok;
1380   Register temp = output.high;
1381 
1382   reserveStack(2 * sizeof(int32_t));
1383   storeFloat32(input, Operand(esp, 0));
1384 
1385   truncateFloat32ToInt64(Address(esp, 0), Address(esp, 0), temp);
1386   load64(Address(esp, 0), output);
1387 
1388   cmpl(Imm32(0), Operand(esp, 0));
1389   j(Assembler::NotEqual, &ok);
1390 
1391   cmpl(Imm32(1), Operand(esp, 4));
1392   j(Assembler::Overflow, oolEntry);
1393 
1394   bind(&ok);
1395   bind(oolRejoin);
1396 
1397   freeStack(2 * sizeof(int32_t));
1398 }
1399 
wasmTruncateDoubleToUInt64(FloatRegister input,Register64 output,bool isSaturating,Label * oolEntry,Label * oolRejoin,FloatRegister tempReg)1400 void MacroAssembler::wasmTruncateDoubleToUInt64(
1401     FloatRegister input, Register64 output, bool isSaturating, Label* oolEntry,
1402     Label* oolRejoin, FloatRegister tempReg) {
1403   Label fail, convert;
1404   Register temp = output.high;
1405 
1406   // Make sure input fits in uint64.
1407   reserveStack(2 * sizeof(int32_t));
1408   storeDouble(input, Operand(esp, 0));
1409   branchDoubleNotInUInt64Range(Address(esp, 0), temp, &fail);
1410   size_t stackBeforeBranch = framePushed();
1411   jump(&convert);
1412 
1413   bind(&fail);
1414   freeStack(2 * sizeof(int32_t));
1415   jump(oolEntry);
1416   if (isSaturating) {
1417     // The OOL path computes the right values.
1418     setFramePushed(stackBeforeBranch);
1419   } else {
1420     // The OOL path just checks the input values.
1421     bind(oolRejoin);
1422     reserveStack(2 * sizeof(int32_t));
1423     storeDouble(input, Operand(esp, 0));
1424   }
1425 
1426   // Convert the double/float to uint64.
1427   bind(&convert);
1428   truncateDoubleToUInt64(Address(esp, 0), Address(esp, 0), temp, tempReg);
1429 
1430   // Load value into int64 register.
1431   load64(Address(esp, 0), output);
1432   freeStack(2 * sizeof(int32_t));
1433 
1434   if (isSaturating) {
1435     bind(oolRejoin);
1436   }
1437 }
1438 
wasmTruncateFloat32ToUInt64(FloatRegister input,Register64 output,bool isSaturating,Label * oolEntry,Label * oolRejoin,FloatRegister tempReg)1439 void MacroAssembler::wasmTruncateFloat32ToUInt64(
1440     FloatRegister input, Register64 output, bool isSaturating, Label* oolEntry,
1441     Label* oolRejoin, FloatRegister tempReg) {
1442   Label fail, convert;
1443   Register temp = output.high;
1444 
1445   // Make sure input fits in uint64.
1446   reserveStack(2 * sizeof(int32_t));
1447   storeFloat32(input, Operand(esp, 0));
1448   branchFloat32NotInUInt64Range(Address(esp, 0), temp, &fail);
1449   size_t stackBeforeBranch = framePushed();
1450   jump(&convert);
1451 
1452   bind(&fail);
1453   freeStack(2 * sizeof(int32_t));
1454   jump(oolEntry);
1455   if (isSaturating) {
1456     // The OOL path computes the right values.
1457     setFramePushed(stackBeforeBranch);
1458   } else {
1459     // The OOL path just checks the input values.
1460     bind(oolRejoin);
1461     reserveStack(2 * sizeof(int32_t));
1462     storeFloat32(input, Operand(esp, 0));
1463   }
1464 
1465   // Convert the float to uint64.
1466   bind(&convert);
1467   truncateFloat32ToUInt64(Address(esp, 0), Address(esp, 0), temp, tempReg);
1468 
1469   // Load value into int64 register.
1470   load64(Address(esp, 0), output);
1471   freeStack(2 * sizeof(int32_t));
1472 
1473   if (isSaturating) {
1474     bind(oolRejoin);
1475   }
1476 }
1477 
1478 // ========================================================================
1479 // Primitive atomic operations.
1480 
atomicLoad64(const Synchronization &,const Address & mem,Register64 temp,Register64 output)1481 void MacroAssembler::atomicLoad64(const Synchronization&, const Address& mem,
1482                                   Register64 temp, Register64 output) {
1483   AtomicLoad64(*this, nullptr, mem, temp, output);
1484 }
1485 
atomicLoad64(const Synchronization &,const BaseIndex & mem,Register64 temp,Register64 output)1486 void MacroAssembler::atomicLoad64(const Synchronization&, const BaseIndex& mem,
1487                                   Register64 temp, Register64 output) {
1488   AtomicLoad64(*this, nullptr, mem, temp, output);
1489 }
1490 
atomicStore64(const Synchronization &,const Address & mem,Register64 value,Register64 temp)1491 void MacroAssembler::atomicStore64(const Synchronization&, const Address& mem,
1492                                    Register64 value, Register64 temp) {
1493   AtomicExchange64(*this, nullptr, mem, value, temp);
1494 }
1495 
atomicStore64(const Synchronization &,const BaseIndex & mem,Register64 value,Register64 temp)1496 void MacroAssembler::atomicStore64(const Synchronization&, const BaseIndex& mem,
1497                                    Register64 value, Register64 temp) {
1498   AtomicExchange64(*this, nullptr, mem, value, temp);
1499 }
1500 
compareExchange64(const Synchronization &,const Address & mem,Register64 expected,Register64 replacement,Register64 output)1501 void MacroAssembler::compareExchange64(const Synchronization&,
1502                                        const Address& mem, Register64 expected,
1503                                        Register64 replacement,
1504                                        Register64 output) {
1505   CompareExchange64(*this, nullptr, mem, expected, replacement, output);
1506 }
1507 
compareExchange64(const Synchronization &,const BaseIndex & mem,Register64 expected,Register64 replacement,Register64 output)1508 void MacroAssembler::compareExchange64(const Synchronization&,
1509                                        const BaseIndex& mem,
1510                                        Register64 expected,
1511                                        Register64 replacement,
1512                                        Register64 output) {
1513   CompareExchange64(*this, nullptr, mem, expected, replacement, output);
1514 }
1515 
atomicExchange64(const Synchronization &,const Address & mem,Register64 value,Register64 output)1516 void MacroAssembler::atomicExchange64(const Synchronization&,
1517                                       const Address& mem, Register64 value,
1518                                       Register64 output) {
1519   AtomicExchange64(*this, nullptr, mem, value, output);
1520 }
1521 
atomicExchange64(const Synchronization &,const BaseIndex & mem,Register64 value,Register64 output)1522 void MacroAssembler::atomicExchange64(const Synchronization&,
1523                                       const BaseIndex& mem, Register64 value,
1524                                       Register64 output) {
1525   AtomicExchange64(*this, nullptr, mem, value, output);
1526 }
1527 
atomicFetchOp64(const Synchronization &,AtomicOp op,const Address & value,const Address & mem,Register64 temp,Register64 output)1528 void MacroAssembler::atomicFetchOp64(const Synchronization&, AtomicOp op,
1529                                      const Address& value, const Address& mem,
1530                                      Register64 temp, Register64 output) {
1531   AtomicFetchOp64(*this, nullptr, op, value, mem, temp, output);
1532 }
1533 
atomicFetchOp64(const Synchronization &,AtomicOp op,const Address & value,const BaseIndex & mem,Register64 temp,Register64 output)1534 void MacroAssembler::atomicFetchOp64(const Synchronization&, AtomicOp op,
1535                                      const Address& value, const BaseIndex& mem,
1536                                      Register64 temp, Register64 output) {
1537   AtomicFetchOp64(*this, nullptr, op, value, mem, temp, output);
1538 }
1539 
1540 // ========================================================================
1541 // Convert floating point.
1542 
convertUInt64ToDoubleNeedsTemp()1543 bool MacroAssembler::convertUInt64ToDoubleNeedsTemp() { return HasSSE3(); }
1544 
convertUInt64ToDouble(Register64 src,FloatRegister dest,Register temp)1545 void MacroAssembler::convertUInt64ToDouble(Register64 src, FloatRegister dest,
1546                                            Register temp) {
1547   // SUBPD needs SSE2, HADDPD needs SSE3.
1548   if (!HasSSE3()) {
1549     MOZ_ASSERT(temp == Register::Invalid());
1550 
1551     // Zero the dest register to break dependencies, see convertInt32ToDouble.
1552     zeroDouble(dest);
1553 
1554     Push(src.high);
1555     Push(src.low);
1556     fild(Operand(esp, 0));
1557 
1558     Label notNegative;
1559     branch32(Assembler::NotSigned, src.high, Imm32(0), &notNegative);
1560     double add_constant = 18446744073709551616.0;  // 2^64
1561     store64(Imm64(mozilla::BitwiseCast<uint64_t>(add_constant)),
1562             Address(esp, 0));
1563     fld(Operand(esp, 0));
1564     faddp();
1565     bind(&notNegative);
1566 
1567     fstp(Operand(esp, 0));
1568     vmovsd(Address(esp, 0), dest);
1569     freeStack(2 * sizeof(intptr_t));
1570     return;
1571   }
1572 
1573   ScratchSimd128Scope scratch(*this);
1574 
1575   // Following operation uses entire 128-bit of dest XMM register.
1576   // Currently higher 64-bit is free when we have access to lower 64-bit.
1577   MOZ_ASSERT(dest.size() == 8);
1578   FloatRegister dest128 =
1579       FloatRegister(dest.encoding(), FloatRegisters::Simd128);
1580 
1581   // Assume that src is represented as following:
1582   //   src      = 0x HHHHHHHH LLLLLLLL
1583 
1584   // Move src to dest (=dest128) and ScratchInt32x4Reg (=scratch):
1585   //   dest     = 0x 00000000 00000000  00000000 LLLLLLLL
1586   //   scratch  = 0x 00000000 00000000  00000000 HHHHHHHH
1587   vmovd(src.low, dest128);
1588   vmovd(src.high, scratch);
1589 
1590   // Unpack and interleave dest and scratch to dest:
1591   //   dest     = 0x 00000000 00000000  HHHHHHHH LLLLLLLL
1592   vpunpckldq(scratch, dest128, dest128);
1593 
1594   // Unpack and interleave dest and a constant C1 to dest:
1595   //   C1       = 0x 00000000 00000000  45300000 43300000
1596   //   dest     = 0x 45300000 HHHHHHHH  43300000 LLLLLLLL
1597   // here, each 64-bit part of dest represents following double:
1598   //   HI(dest) = 0x 1.00000HHHHHHHH * 2**84 == 2**84 + 0x HHHHHHHH 00000000
1599   //   LO(dest) = 0x 1.00000LLLLLLLL * 2**52 == 2**52 + 0x 00000000 LLLLLLLL
1600   // See convertUInt64ToDouble for the details.
1601   static const int32_t CST1[4] = {
1602       0x43300000,
1603       0x45300000,
1604       0x0,
1605       0x0,
1606   };
1607 
1608   loadConstantSimd128Int(SimdConstant::CreateX4(CST1), scratch);
1609   vpunpckldq(scratch, dest128, dest128);
1610 
1611   // Subtract a constant C2 from dest, for each 64-bit part:
1612   //   C2       = 0x 45300000 00000000  43300000 00000000
1613   // here, each 64-bit part of C2 represents following double:
1614   //   HI(C2)   = 0x 1.0000000000000 * 2**84 == 2**84
1615   //   LO(C2)   = 0x 1.0000000000000 * 2**52 == 2**52
1616   // after the operation each 64-bit part of dest represents following:
1617   //   HI(dest) = double(0x HHHHHHHH 00000000)
1618   //   LO(dest) = double(0x 00000000 LLLLLLLL)
1619   static const int32_t CST2[4] = {
1620       0x0,
1621       0x43300000,
1622       0x0,
1623       0x45300000,
1624   };
1625 
1626   loadConstantSimd128Int(SimdConstant::CreateX4(CST2), scratch);
1627   vsubpd(Operand(scratch), dest128, dest128);
1628 
1629   // Add HI(dest) and LO(dest) in double and store it into LO(dest),
1630   //   LO(dest) = double(0x HHHHHHHH 00000000) + double(0x 00000000 LLLLLLLL)
1631   //            = double(0x HHHHHHHH LLLLLLLL)
1632   //            = double(src)
1633   vhaddpd(dest128, dest128);
1634 }
1635 
convertInt64ToDouble(Register64 input,FloatRegister output)1636 void MacroAssembler::convertInt64ToDouble(Register64 input,
1637                                           FloatRegister output) {
1638   // Zero the output register to break dependencies, see convertInt32ToDouble.
1639   zeroDouble(output);
1640 
1641   Push(input.high);
1642   Push(input.low);
1643   fild(Operand(esp, 0));
1644 
1645   fstp(Operand(esp, 0));
1646   vmovsd(Address(esp, 0), output);
1647   freeStack(2 * sizeof(intptr_t));
1648 }
1649 
convertUInt64ToFloat32(Register64 input,FloatRegister output,Register temp)1650 void MacroAssembler::convertUInt64ToFloat32(Register64 input,
1651                                             FloatRegister output,
1652                                             Register temp) {
1653   // Zero the dest register to break dependencies, see convertInt32ToDouble.
1654   zeroDouble(output);
1655 
1656   // Set the FPU precision to 80 bits.
1657   reserveStack(2 * sizeof(intptr_t));
1658   fnstcw(Operand(esp, 0));
1659   load32(Operand(esp, 0), temp);
1660   orl(Imm32(0x300), temp);
1661   store32(temp, Operand(esp, sizeof(intptr_t)));
1662   fldcw(Operand(esp, sizeof(intptr_t)));
1663 
1664   Push(input.high);
1665   Push(input.low);
1666   fild(Operand(esp, 0));
1667 
1668   Label notNegative;
1669   branch32(Assembler::NotSigned, input.high, Imm32(0), &notNegative);
1670   double add_constant = 18446744073709551616.0;  // 2^64
1671   uint64_t add_constant_u64 = mozilla::BitwiseCast<uint64_t>(add_constant);
1672   store64(Imm64(add_constant_u64), Address(esp, 0));
1673 
1674   fld(Operand(esp, 0));
1675   faddp();
1676   bind(&notNegative);
1677 
1678   fstp32(Operand(esp, 0));
1679   vmovss(Address(esp, 0), output);
1680   freeStack(2 * sizeof(intptr_t));
1681 
1682   // Restore FPU precision to the initial value.
1683   fldcw(Operand(esp, 0));
1684   freeStack(2 * sizeof(intptr_t));
1685 }
1686 
convertInt64ToFloat32(Register64 input,FloatRegister output)1687 void MacroAssembler::convertInt64ToFloat32(Register64 input,
1688                                            FloatRegister output) {
1689   // Zero the output register to break dependencies, see convertInt32ToDouble.
1690   zeroDouble(output);
1691 
1692   Push(input.high);
1693   Push(input.low);
1694   fild(Operand(esp, 0));
1695 
1696   fstp32(Operand(esp, 0));
1697   vmovss(Address(esp, 0), output);
1698   freeStack(2 * sizeof(intptr_t));
1699 }
1700 
convertIntPtrToDouble(Register src,FloatRegister dest)1701 void MacroAssembler::convertIntPtrToDouble(Register src, FloatRegister dest) {
1702   convertInt32ToDouble(src, dest);
1703 }
1704 
PushBoxed(FloatRegister reg)1705 void MacroAssembler::PushBoxed(FloatRegister reg) { Push(reg); }
1706 
moveNearAddressWithPatch(Register dest)1707 CodeOffset MacroAssembler::moveNearAddressWithPatch(Register dest) {
1708   return movWithPatch(ImmPtr(nullptr), dest);
1709 }
1710 
patchNearAddressMove(CodeLocationLabel loc,CodeLocationLabel target)1711 void MacroAssembler::patchNearAddressMove(CodeLocationLabel loc,
1712                                           CodeLocationLabel target) {
1713   PatchDataWithValueCheck(loc, ImmPtr(target.raw()), ImmPtr(nullptr));
1714 }
1715 
1716 //}}} check_macroassembler_style
1717