1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/auxv.h>
28 
29 #ifdef __ARCH__
30 #define ENABLE_STATIC_FACILITY_DETECTION 1
31 #else
32 #define ENABLE_STATIC_FACILITY_DETECTION 0
33 #endif
34 #define ENABLE_DYNAMIC_FACILITY_DETECTION 1
35 
sljit_get_platform_name(void)36 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
37 {
38 	return "s390x" SLJIT_CPUINFO;
39 }
40 
41 /* Instructions. */
42 typedef sljit_uw sljit_ins;
43 
44 /* Instruction tags (most significant halfword). */
45 static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48;
46 
47 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
48 	0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1
49 };
50 
51 /* there are also a[2-15] available, but they are slower to access and
52  * their use is limited as mundaym explained:
53  *   https://github.com/zherczeg/sljit/pull/91#discussion_r486895689
54  */
55 
56 /* General Purpose Registers [0-15]. */
57 typedef sljit_uw sljit_gpr;
58 
59 /*
60  * WARNING
61  * the following code is non standard and should be improved for
62  * consistency, but doesn't use SLJIT_NUMBER_OF_REGISTERS based
63  * registers because r0 and r1 are the ABI recommended volatiles.
64  * there is a gpr() function that maps sljit to physical register numbers
65  * that should be used instead of the usual index into reg_map[] and
66  * will be retired ASAP (TODO: carenas)
67  */
68 
69 static const sljit_gpr r0 = 0;		/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */
70 static const sljit_gpr r1 = 1;		/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */
71 static const sljit_gpr r2 = 2;		/* reg_map[1]: 1st argument */
72 static const sljit_gpr r3 = 3;		/* reg_map[2]: 2nd argument */
73 static const sljit_gpr r4 = 4;		/* reg_map[3]: 3rd argument */
74 static const sljit_gpr r5 = 5;		/* reg_map[4]: 4th argument */
75 static const sljit_gpr r6 = 6;		/* reg_map[5]: 5th argument; 1st saved register */
76 static const sljit_gpr r7 = 7;		/* reg_map[6] */
77 static const sljit_gpr r8 = 8;		/* reg_map[7] */
78 static const sljit_gpr r9 = 9;		/* reg_map[8] */
79 static const sljit_gpr r10 = 10;	/* reg_map[9] */
80 static const sljit_gpr r11 = 11;	/* reg_map[10] */
81 static const sljit_gpr r12 = 12;	/* reg_map[11]: GOT */
82 static const sljit_gpr r13 = 13;	/* reg_map[12]: Literal Pool pointer */
83 static const sljit_gpr r14 = 14;	/* reg_map[0]: return address and flag register */
84 static const sljit_gpr r15 = 15;	/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */
85 
86 /* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */
87 /* TODO(carenas): r12 might conflict in PIC code, reserve? */
88 /* TODO(carenas): r13 is usually pointed to "pool" per ABI, using a tmp
89  *                like we do know might be faster though, reserve?
90  */
91 
92 /* TODO(carenas): should be named TMP_REG[1-2] for consistency */
93 #define tmp0	r0
94 #define tmp1	r1
95 
96 /* TODO(carenas): flags should move to a different register so that
97  *                link register doesn't need to change
98  */
99 
100 /* Link registers. The normal link register is r14, but since
101    we use that for flags we need to use r0 instead to do fast
102    calls so that flags are preserved. */
103 static const sljit_gpr link_r = 14;     /* r14 */
104 static const sljit_gpr fast_link_r = 0; /* r0 */
105 
106 /* Flag register layout:
107 
108    0               32  33  34      36      64
109    +---------------+---+---+-------+-------+
110    |      ZERO     | 0 | 0 |  C C  |///////|
111    +---------------+---+---+-------+-------+
112 */
113 static const sljit_gpr flag_r = 14; /* r14 */
114 
115 struct sljit_s390x_const {
116 	struct sljit_const const_; /* must be first */
117 	sljit_sw init_value;       /* required to build literal pool */
118 };
119 
120 /* Convert SLJIT register to hardware register. */
gpr(sljit_s32 r)121 static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r)
122 {
123 	SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0])));
124 	return reg_map[r];
125 }
126 
127 /* Size of instruction in bytes. Tags must already be cleared. */
sizeof_ins(sljit_ins ins)128 static SLJIT_INLINE sljit_uw sizeof_ins(sljit_ins ins)
129 {
130 	/* keep faulting instructions */
131 	if (ins == 0)
132 		 return 2;
133 
134 	if ((ins & 0x00000000ffffL) == ins)
135 		 return 2;
136 	if ((ins & 0x0000ffffffffL) == ins)
137 		 return 4;
138 	if ((ins & 0xffffffffffffL) == ins)
139 		 return 6;
140 
141 	SLJIT_UNREACHABLE();
142 	return (sljit_uw)-1;
143 }
144 
push_inst(struct sljit_compiler * compiler,sljit_ins ins)145 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
146 {
147 	sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
148 	FAIL_IF(!ibuf);
149 	*ibuf = ins;
150 	compiler->size++;
151 	return SLJIT_SUCCESS;
152 }
153 
encode_inst(void ** ptr,sljit_ins ins)154 static sljit_s32 encode_inst(void **ptr, sljit_ins ins)
155 {
156 	sljit_u16 *ibuf = (sljit_u16 *)*ptr;
157 	sljit_uw size = sizeof_ins(ins);
158 
159 	SLJIT_ASSERT((size & 6) == size);
160 	switch (size) {
161 	case 6:
162 		*ibuf++ = (sljit_u16)(ins >> 32);
163 		/* fallthrough */
164 	case 4:
165 		*ibuf++ = (sljit_u16)(ins >> 16);
166 		/* fallthrough */
167 	case 2:
168 		*ibuf++ = (sljit_u16)(ins);
169 	}
170 	*ptr = (void*)ibuf;
171 	return SLJIT_SUCCESS;
172 }
173 
174 #define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \
175 	(((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD_SUB | SLJIT_CURRENT_FLAGS_COMPARE)) == SLJIT_CURRENT_FLAGS_ADD_SUB)
176 
177 /* Map the given type to a 4-bit condition code mask. */
get_cc(struct sljit_compiler * compiler,sljit_s32 type)178 static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) {
179 	const sljit_u8 cc0 = 1 << 3; /* equal {,to zero} */
180 	const sljit_u8 cc1 = 1 << 2; /* less than {,zero} */
181 	const sljit_u8 cc2 = 1 << 1; /* greater than {,zero} */
182 	const sljit_u8 cc3 = 1 << 0; /* {overflow,NaN} */
183 
184 	switch (type) {
185 	case SLJIT_EQUAL:
186 		if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
187 			sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
188 			if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
189 				return cc0;
190 			if (type == SLJIT_OVERFLOW)
191 				return (cc0 | cc3);
192 			return (cc0 | cc2);
193 		}
194 
195 	case SLJIT_EQUAL_F64:
196 		return cc0;
197 
198 	case SLJIT_NOT_EQUAL:
199 		if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
200 			sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
201 			if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
202 				return (cc1 | cc2 | cc3);
203 			if (type == SLJIT_OVERFLOW)
204 				return (cc1 | cc2);
205 			return (cc1 | cc3);
206 		}
207 
208 	case SLJIT_NOT_EQUAL_F64:
209 		return (cc1 | cc2 | cc3);
210 
211 	case SLJIT_LESS:
212 		return cc1;
213 
214 	case SLJIT_GREATER_EQUAL:
215 		return (cc0 | cc2 | cc3);
216 
217 	case SLJIT_GREATER:
218 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
219 			return cc2;
220 		return cc3;
221 
222 	case SLJIT_LESS_EQUAL:
223 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
224 			return (cc0 | cc1);
225 		return (cc0 | cc1 | cc2);
226 
227 	case SLJIT_SIG_LESS:
228 	case SLJIT_LESS_F64:
229 		return cc1;
230 
231 	case SLJIT_SIG_LESS_EQUAL:
232 	case SLJIT_LESS_EQUAL_F64:
233 		return (cc0 | cc1);
234 
235 	case SLJIT_SIG_GREATER:
236 		/* Overflow is considered greater, see SLJIT_SUB. */
237 		return cc2 | cc3;
238 
239 	case SLJIT_SIG_GREATER_EQUAL:
240 		return (cc0 | cc2 | cc3);
241 
242 	case SLJIT_OVERFLOW:
243 		if (compiler->status_flags_state & SLJIT_SET_Z)
244 			return (cc2 | cc3);
245 
246 	case SLJIT_UNORDERED_F64:
247 		return cc3;
248 
249 	case SLJIT_NOT_OVERFLOW:
250 		if (compiler->status_flags_state & SLJIT_SET_Z)
251 			return (cc0 | cc1);
252 
253 	case SLJIT_ORDERED_F64:
254 		return (cc0 | cc1 | cc2);
255 
256 	case SLJIT_GREATER_F64:
257 		return cc2;
258 
259 	case SLJIT_GREATER_EQUAL_F64:
260 		return (cc0 | cc2);
261 	}
262 
263 	SLJIT_UNREACHABLE();
264 	return (sljit_u8)-1;
265 }
266 
267 /* Facility to bit index mappings.
268    Note: some facilities share the same bit index. */
269 typedef sljit_uw facility_bit;
270 #define STORE_FACILITY_LIST_EXTENDED_FACILITY 7
271 #define FAST_LONG_DISPLACEMENT_FACILITY 19
272 #define EXTENDED_IMMEDIATE_FACILITY 21
273 #define GENERAL_INSTRUCTION_EXTENSION_FACILITY 34
274 #define DISTINCT_OPERAND_FACILITY 45
275 #define HIGH_WORD_FACILITY 45
276 #define POPULATION_COUNT_FACILITY 45
277 #define LOAD_STORE_ON_CONDITION_1_FACILITY 45
278 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY 49
279 #define LOAD_STORE_ON_CONDITION_2_FACILITY 53
280 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY 58
281 #define VECTOR_FACILITY 129
282 #define VECTOR_ENHANCEMENTS_1_FACILITY 135
283 
284 /* Report whether a facility is known to be present due to the compiler
285    settings. This function should always be compiled to a constant
286    value given a constant argument. */
have_facility_static(facility_bit x)287 static SLJIT_INLINE int have_facility_static(facility_bit x)
288 {
289 #if ENABLE_STATIC_FACILITY_DETECTION
290 	switch (x) {
291 	case FAST_LONG_DISPLACEMENT_FACILITY:
292 		return (__ARCH__ >=  6 /* z990 */);
293 	case EXTENDED_IMMEDIATE_FACILITY:
294 	case STORE_FACILITY_LIST_EXTENDED_FACILITY:
295 		return (__ARCH__ >=  7 /* z9-109 */);
296 	case GENERAL_INSTRUCTION_EXTENSION_FACILITY:
297 		return (__ARCH__ >=  8 /* z10 */);
298 	case DISTINCT_OPERAND_FACILITY:
299 		return (__ARCH__ >=  9 /* z196 */);
300 	case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY:
301 		return (__ARCH__ >= 10 /* zEC12 */);
302 	case LOAD_STORE_ON_CONDITION_2_FACILITY:
303 	case VECTOR_FACILITY:
304 		return (__ARCH__ >= 11 /* z13 */);
305 	case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY:
306 	case VECTOR_ENHANCEMENTS_1_FACILITY:
307 		return (__ARCH__ >= 12 /* z14 */);
308 	default:
309 		SLJIT_UNREACHABLE();
310 	}
311 #endif
312 	return 0;
313 }
314 
get_hwcap()315 static SLJIT_INLINE unsigned long get_hwcap()
316 {
317 	static unsigned long hwcap = 0;
318 	if (SLJIT_UNLIKELY(!hwcap)) {
319 		hwcap = getauxval(AT_HWCAP);
320 		SLJIT_ASSERT(hwcap != 0);
321 	}
322 	return hwcap;
323 }
324 
have_stfle()325 static SLJIT_INLINE int have_stfle()
326 {
327 	if (have_facility_static(STORE_FACILITY_LIST_EXTENDED_FACILITY))
328 		return 1;
329 
330 	return (get_hwcap() & HWCAP_S390_STFLE);
331 }
332 
333 /* Report whether the given facility is available. This function always
334    performs a runtime check. */
have_facility_dynamic(facility_bit x)335 static int have_facility_dynamic(facility_bit x)
336 {
337 #if ENABLE_DYNAMIC_FACILITY_DETECTION
338 	static struct {
339 		sljit_uw bits[4];
340 	} cpu_features;
341 	size_t size = sizeof(cpu_features);
342 	const sljit_uw word_index = x >> 6;
343 	const sljit_uw bit_index = ((1UL << 63) >> (x & 63));
344 
345 	SLJIT_ASSERT(x < size * 8);
346 	if (SLJIT_UNLIKELY(!have_stfle()))
347 		return 0;
348 
349 	if (SLJIT_UNLIKELY(cpu_features.bits[0] == 0)) {
350 		__asm__ __volatile__ (
351 			"lgr   %%r0, %0;"
352 			"stfle 0(%1);"
353 			/* outputs  */:
354 			/* inputs   */: "d" ((size / 8) - 1), "a" (&cpu_features)
355 			/* clobbers */: "r0", "cc", "memory"
356 		);
357 		SLJIT_ASSERT(cpu_features.bits[0] != 0);
358 	}
359 	return (cpu_features.bits[word_index] & bit_index) != 0;
360 #else
361 	return 0;
362 #endif
363 }
364 
365 #define HAVE_FACILITY(name, bit) \
366 static SLJIT_INLINE int name() \
367 { \
368 	static int have = -1; \
369 	/* Static check first. May allow the function to be optimized away. */ \
370 	if (have_facility_static(bit)) \
371 		have = 1; \
372 	else if (SLJIT_UNLIKELY(have < 0)) \
373 		have = have_facility_dynamic(bit) ? 1 : 0; \
374 \
375 	return have; \
376 }
377 
HAVE_FACILITY(have_eimm,EXTENDED_IMMEDIATE_FACILITY)378 HAVE_FACILITY(have_eimm,    EXTENDED_IMMEDIATE_FACILITY)
379 HAVE_FACILITY(have_ldisp,   FAST_LONG_DISPLACEMENT_FACILITY)
380 HAVE_FACILITY(have_genext,  GENERAL_INSTRUCTION_EXTENSION_FACILITY)
381 HAVE_FACILITY(have_lscond1, LOAD_STORE_ON_CONDITION_1_FACILITY)
382 HAVE_FACILITY(have_lscond2, LOAD_STORE_ON_CONDITION_2_FACILITY)
383 HAVE_FACILITY(have_misc2,   MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY)
384 #undef HAVE_FACILITY
385 
386 #define is_u12(d)	(0 <= (d) && (d) <= 0x00000fffL)
387 #define is_u32(d)	(0 <= (d) && (d) <= 0xffffffffL)
388 
389 #define CHECK_SIGNED(v, bitlen) \
390 	((v) >= -(1 << ((bitlen) - 1)) && (v) < (1 << ((bitlen) - 1)))
391 
392 #define is_s8(d)	CHECK_SIGNED((d), 8)
393 #define is_s16(d)	CHECK_SIGNED((d), 16)
394 #define is_s20(d)	CHECK_SIGNED((d), 20)
395 #define is_s32(d)	((d) == (sljit_s32)(d))
396 
397 static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d)
398 {
399 	SLJIT_ASSERT(is_s20(d));
400 
401 	sljit_uw dh = (d >> 12) & 0xff;
402 	sljit_uw dl = (d << 8) & 0xfff00;
403 	return (dh | dl) << 8;
404 }
405 
406 /* TODO(carenas): variadic macro is not strictly needed */
407 #define SLJIT_S390X_INSTRUCTION(op, ...) \
408 static SLJIT_INLINE sljit_ins op(__VA_ARGS__)
409 
410 /* RR form instructions. */
411 #define SLJIT_S390X_RR(name, pattern) \
412 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
413 { \
414 	return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \
415 }
416 
417 /* AND */
418 SLJIT_S390X_RR(nr,   0x1400)
419 
420 /* BRANCH AND SAVE */
421 SLJIT_S390X_RR(basr, 0x0d00)
422 
423 /* BRANCH ON CONDITION */
424 SLJIT_S390X_RR(bcr,  0x0700) /* TODO(mundaym): type for mask? */
425 
426 /* DIVIDE */
427 SLJIT_S390X_RR(dr,   0x1d00)
428 
429 /* EXCLUSIVE OR */
430 SLJIT_S390X_RR(xr,   0x1700)
431 
432 /* LOAD */
433 SLJIT_S390X_RR(lr,   0x1800)
434 
435 /* LOAD COMPLEMENT */
436 SLJIT_S390X_RR(lcr,  0x1300)
437 
438 /* OR */
439 SLJIT_S390X_RR(or,   0x1600)
440 
441 #undef SLJIT_S390X_RR
442 
443 /* RRE form instructions */
444 #define SLJIT_S390X_RRE(name, pattern) \
445 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
446 { \
447 	return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \
448 }
449 
450 /* AND */
451 SLJIT_S390X_RRE(ngr,   0xb9800000)
452 
453 /* DIVIDE LOGICAL */
454 SLJIT_S390X_RRE(dlr,   0xb9970000)
455 SLJIT_S390X_RRE(dlgr,  0xb9870000)
456 
457 /* DIVIDE SINGLE */
458 SLJIT_S390X_RRE(dsgr,  0xb90d0000)
459 
460 /* EXCLUSIVE OR */
461 SLJIT_S390X_RRE(xgr,   0xb9820000)
462 
463 /* LOAD */
464 SLJIT_S390X_RRE(lgr,   0xb9040000)
465 SLJIT_S390X_RRE(lgfr,  0xb9140000)
466 
467 /* LOAD BYTE */
468 SLJIT_S390X_RRE(lbr,   0xb9260000)
469 SLJIT_S390X_RRE(lgbr,  0xb9060000)
470 
471 /* LOAD COMPLEMENT */
472 SLJIT_S390X_RRE(lcgr,  0xb9030000)
473 
474 /* LOAD HALFWORD */
475 SLJIT_S390X_RRE(lhr,   0xb9270000)
476 SLJIT_S390X_RRE(lghr,  0xb9070000)
477 
478 /* LOAD LOGICAL */
479 SLJIT_S390X_RRE(llgfr, 0xb9160000)
480 
481 /* LOAD LOGICAL CHARACTER */
482 SLJIT_S390X_RRE(llcr,  0xb9940000)
483 SLJIT_S390X_RRE(llgcr, 0xb9840000)
484 
485 /* LOAD LOGICAL HALFWORD */
486 SLJIT_S390X_RRE(llhr,  0xb9950000)
487 SLJIT_S390X_RRE(llghr, 0xb9850000)
488 
489 /* MULTIPLY LOGICAL */
490 SLJIT_S390X_RRE(mlgr,  0xb9860000)
491 
492 /* MULTIPLY SINGLE */
493 SLJIT_S390X_RRE(msgfr, 0xb91c0000)
494 
495 /* OR */
496 SLJIT_S390X_RRE(ogr,   0xb9810000)
497 
498 /* SUBTRACT */
499 SLJIT_S390X_RRE(sgr,   0xb9090000)
500 
501 #undef SLJIT_S390X_RRE
502 
503 /* RI-a form instructions */
504 #define SLJIT_S390X_RIA(name, pattern, imm_type) \
505 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
506 { \
507 	return (pattern) | ((reg & 0xf) << 20) | (imm & 0xffff); \
508 }
509 
510 /* ADD HALFWORD IMMEDIATE */
511 SLJIT_S390X_RIA(aghi,  0xa70b0000, sljit_s16)
512 
513 /* LOAD HALFWORD IMMEDIATE */
514 SLJIT_S390X_RIA(lhi,   0xa7080000, sljit_s16)
515 SLJIT_S390X_RIA(lghi,  0xa7090000, sljit_s16)
516 
517 /* LOAD LOGICAL IMMEDIATE */
518 SLJIT_S390X_RIA(llihh, 0xa50c0000, sljit_u16)
519 SLJIT_S390X_RIA(llihl, 0xa50d0000, sljit_u16)
520 SLJIT_S390X_RIA(llilh, 0xa50e0000, sljit_u16)
521 SLJIT_S390X_RIA(llill, 0xa50f0000, sljit_u16)
522 
523 /* MULTIPLY HALFWORD IMMEDIATE */
524 SLJIT_S390X_RIA(mhi,   0xa70c0000, sljit_s16)
525 SLJIT_S390X_RIA(mghi,  0xa70d0000, sljit_s16)
526 
527 /* OR IMMEDIATE */
528 SLJIT_S390X_RIA(oilh,  0xa50a0000, sljit_u16)
529 
530 #undef SLJIT_S390X_RIA
531 
532 /* RIL-a form instructions (requires extended immediate facility) */
533 #define SLJIT_S390X_RILA(name, pattern, imm_type) \
534 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
535 { \
536 	SLJIT_ASSERT(have_eimm()); \
537 	return (pattern) | ((sljit_ins)(reg & 0xf) << 36) | (imm & 0xffffffff); \
538 }
539 
540 /* ADD IMMEDIATE */
541 SLJIT_S390X_RILA(agfi,  0xc20800000000, sljit_s32)
542 
543 /* ADD IMMEDIATE HIGH */
544 SLJIT_S390X_RILA(aih,   0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */
545 
546 /* AND IMMEDIATE */
547 SLJIT_S390X_RILA(nihf,  0xc00a00000000, sljit_u32)
548 
549 /* EXCLUSIVE OR IMMEDIATE */
550 SLJIT_S390X_RILA(xilf,  0xc00700000000, sljit_u32)
551 
552 /* INSERT IMMEDIATE */
553 SLJIT_S390X_RILA(iihf,  0xc00800000000, sljit_u32)
554 SLJIT_S390X_RILA(iilf,  0xc00900000000, sljit_u32)
555 
556 /* LOAD IMMEDIATE */
557 SLJIT_S390X_RILA(lgfi,  0xc00100000000, sljit_s32)
558 
559 /* LOAD LOGICAL IMMEDIATE */
560 SLJIT_S390X_RILA(llihf, 0xc00e00000000, sljit_u32)
561 SLJIT_S390X_RILA(llilf, 0xc00f00000000, sljit_u32)
562 
563 /* SUBTRACT LOGICAL IMMEDIATE */
564 SLJIT_S390X_RILA(slfi,  0xc20500000000, sljit_u32)
565 
566 #undef SLJIT_S390X_RILA
567 
568 /* RX-a form instructions */
569 #define SLJIT_S390X_RXA(name, pattern) \
570 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_u16 d, sljit_gpr x, sljit_gpr b) \
571 { \
572 	sljit_ins ri, xi, bi, di; \
573 \
574 	SLJIT_ASSERT((d & 0xfff) == d); \
575 	ri = (sljit_ins)(r & 0xf) << 20; \
576 	xi = (sljit_ins)(x & 0xf) << 16; \
577 	bi = (sljit_ins)(b & 0xf) << 12; \
578 	di = (sljit_ins)(d & 0xfff); \
579 \
580 	return (pattern) | ri | xi | bi | di; \
581 }
582 
583 /* LOAD */
584 SLJIT_S390X_RXA(l,   0x58000000)
585 
586 /* LOAD ADDRESS */
587 SLJIT_S390X_RXA(la,  0x41000000)
588 
589 /* LOAD HALFWORD */
590 SLJIT_S390X_RXA(lh,  0x48000000)
591 
592 /* MULTIPLY SINGLE */
593 SLJIT_S390X_RXA(ms,  0x71000000)
594 
595 /* STORE */
596 SLJIT_S390X_RXA(st,  0x50000000)
597 
598 /* STORE CHARACTER */
599 SLJIT_S390X_RXA(stc, 0x42000000)
600 
601 /* STORE HALFWORD */
602 SLJIT_S390X_RXA(sth, 0x40000000)
603 
604 #undef SLJIT_S390X_RXA
605 
606 /* RXY-a instructions */
607 #define SLJIT_S390X_RXYA(name, pattern, cond) \
608 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
609 { \
610 	sljit_ins ri, xi, bi, di; \
611 \
612 	SLJIT_ASSERT(cond); \
613 	ri = (sljit_ins)(r & 0xf) << 36; \
614 	xi = (sljit_ins)(x & 0xf) << 32; \
615 	bi = (sljit_ins)(b & 0xf) << 28; \
616 	di = disp_s20(d); \
617 \
618 	return (pattern) | ri | xi | bi | di; \
619 }
620 
621 /* LOAD */
622 SLJIT_S390X_RXYA(ly,    0xe30000000058, have_ldisp())
623 SLJIT_S390X_RXYA(lg,    0xe30000000004, 1)
624 SLJIT_S390X_RXYA(lgf,   0xe30000000014, 1)
625 
626 /* LOAD BYTE */
627 SLJIT_S390X_RXYA(lb,    0xe30000000076, have_ldisp())
628 SLJIT_S390X_RXYA(lgb,   0xe30000000077, have_ldisp())
629 
630 /* LOAD HALFWORD */
631 SLJIT_S390X_RXYA(lhy,   0xe30000000078, have_ldisp())
632 SLJIT_S390X_RXYA(lgh,   0xe30000000015, 1)
633 
634 /* LOAD LOGICAL */
635 SLJIT_S390X_RXYA(llgf,  0xe30000000016, 1)
636 
637 /* LOAD LOGICAL CHARACTER */
638 SLJIT_S390X_RXYA(llc,   0xe30000000094, have_eimm())
639 SLJIT_S390X_RXYA(llgc,  0xe30000000090, 1)
640 
641 /* LOAD LOGICAL HALFWORD */
642 SLJIT_S390X_RXYA(llh,   0xe30000000095, have_eimm())
643 SLJIT_S390X_RXYA(llgh,  0xe30000000091, 1)
644 
645 /* MULTIPLY SINGLE */
646 SLJIT_S390X_RXYA(msy,   0xe30000000051, have_ldisp())
647 SLJIT_S390X_RXYA(msg,   0xe3000000000c, 1)
648 
649 /* STORE */
650 SLJIT_S390X_RXYA(sty,   0xe30000000050, have_ldisp())
651 SLJIT_S390X_RXYA(stg,   0xe30000000024, 1)
652 
653 /* STORE CHARACTER */
654 SLJIT_S390X_RXYA(stcy,  0xe30000000072, have_ldisp())
655 
656 /* STORE HALFWORD */
657 SLJIT_S390X_RXYA(sthy,  0xe30000000070, have_ldisp())
658 
659 #undef SLJIT_S390X_RXYA
660 
661 /* RSY-a instructions */
662 #define SLJIT_S390X_RSYA(name, pattern, cond) \
663 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_sw d, sljit_gpr b) \
664 { \
665 	sljit_ins r1, r3, b2, d2; \
666 \
667 	SLJIT_ASSERT(cond); \
668 	r1 = (sljit_ins)(dst & 0xf) << 36; \
669 	r3 = (sljit_ins)(src & 0xf) << 32; \
670 	b2 = (sljit_ins)(b & 0xf) << 28; \
671 	d2 = disp_s20(d); \
672 \
673 	return (pattern) | r1 | r3 | b2 | d2; \
674 }
675 
676 /* LOAD MULTIPLE */
677 SLJIT_S390X_RSYA(lmg,   0xeb0000000004, 1)
678 
679 /* SHIFT LEFT LOGICAL */
680 SLJIT_S390X_RSYA(sllg,  0xeb000000000d, 1)
681 
682 /* SHIFT RIGHT SINGLE */
683 SLJIT_S390X_RSYA(srag,  0xeb000000000a, 1)
684 
685 /* STORE MULTIPLE */
686 SLJIT_S390X_RSYA(stmg,  0xeb0000000024, 1)
687 
688 #undef SLJIT_S390X_RSYA
689 
690 /* RIE-f instructions (require general-instructions-extension facility) */
691 #define SLJIT_S390X_RIEF(name, pattern) \
692 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \
693 { \
694 	sljit_ins r1, r2, i3, i4, i5; \
695 \
696 	SLJIT_ASSERT(have_genext()); \
697 	r1 = (sljit_ins)(dst & 0xf) << 36; \
698 	r2 = (sljit_ins)(src & 0xf) << 32; \
699 	i3 = (sljit_ins)start << 24; \
700 	i4 = (sljit_ins)end << 16; \
701 	i5 = (sljit_ins)rot << 8; \
702 \
703 	return (pattern) | r1 | r2 | i3 | i4 | i5; \
704 }
705 
706 /* ROTATE THEN AND SELECTED BITS */
707 /* SLJIT_S390X_RIEF(rnsbg,  0xec0000000054) */
708 
709 /* ROTATE THEN EXCLUSIVE OR SELECTED BITS */
710 /* SLJIT_S390X_RIEF(rxsbg,  0xec0000000057) */
711 
712 /* ROTATE THEN OR SELECTED BITS */
713 SLJIT_S390X_RIEF(rosbg,  0xec0000000056)
714 
715 /* ROTATE THEN INSERT SELECTED BITS */
716 /* SLJIT_S390X_RIEF(risbg,  0xec0000000055) */
717 /* SLJIT_S390X_RIEF(risbgn, 0xec0000000059) */
718 
719 /* ROTATE THEN INSERT SELECTED BITS HIGH */
720 SLJIT_S390X_RIEF(risbhg, 0xec000000005d)
721 
722 /* ROTATE THEN INSERT SELECTED BITS LOW */
723 /* SLJIT_S390X_RIEF(risblg, 0xec0000000051) */
724 
725 #undef SLJIT_S390X_RIEF
726 
727 /* RRF-c instructions (require load/store-on-condition 1 facility) */
728 #define SLJIT_S390X_RRFC(name, pattern) \
729 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \
730 { \
731 	sljit_ins r1, r2, m3; \
732 \
733 	SLJIT_ASSERT(have_lscond1()); \
734 	r1 = (sljit_ins)(dst & 0xf) << 4; \
735 	r2 = (sljit_ins)(src & 0xf); \
736 	m3 = (sljit_ins)(mask & 0xf) << 12; \
737 \
738 	return (pattern) | m3 | r1 | r2; \
739 }
740 
741 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
742 SLJIT_S390X_RRFC(locr,  0xb9f20000)
743 SLJIT_S390X_RRFC(locgr, 0xb9e20000)
744 
745 #undef SLJIT_S390X_RRFC
746 
747 /* RIE-g instructions (require load/store-on-condition 2 facility) */
748 #define SLJIT_S390X_RIEG(name, pattern) \
749 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \
750 { \
751 	sljit_ins r1, m3, i2; \
752 \
753 	SLJIT_ASSERT(have_lscond2()); \
754 	r1 = (sljit_ins)(reg & 0xf) << 36; \
755 	m3 = (sljit_ins)(mask & 0xf) << 32; \
756 	i2 = (sljit_ins)(imm & 0xffffL) << 16; \
757 \
758 	return (pattern) | r1 | m3 | i2; \
759 }
760 
761 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
762 SLJIT_S390X_RIEG(lochi,  0xec0000000042)
763 SLJIT_S390X_RIEG(locghi, 0xec0000000046)
764 
765 #undef SLJIT_S390X_RIEG
766 
767 #define SLJIT_S390X_RILB(name, pattern, cond) \
768 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \
769 { \
770 	sljit_ins r1, ri2; \
771 \
772 	SLJIT_ASSERT(cond); \
773 	r1 = (sljit_ins)(reg & 0xf) << 36; \
774 	ri2 = (sljit_ins)(ri & 0xffffffff); \
775 \
776 	return (pattern) | r1 | ri2; \
777 }
778 
779 /* BRANCH RELATIVE AND SAVE LONG */
780 SLJIT_S390X_RILB(brasl, 0xc00500000000, 1)
781 
782 /* LOAD ADDRESS RELATIVE LONG */
783 SLJIT_S390X_RILB(larl,  0xc00000000000, 1)
784 
785 /* LOAD RELATIVE LONG */
786 SLJIT_S390X_RILB(lgrl,  0xc40800000000, have_genext())
787 
788 #undef SLJIT_S390X_RILB
789 
SLJIT_S390X_INSTRUCTION(br,sljit_gpr target)790 SLJIT_S390X_INSTRUCTION(br, sljit_gpr target)
791 {
792 	return 0x07f0 | target;
793 }
794 
SLJIT_S390X_INSTRUCTION(brc,sljit_uw mask,sljit_sw target)795 SLJIT_S390X_INSTRUCTION(brc, sljit_uw mask, sljit_sw target)
796 {
797 	sljit_ins m1 = (sljit_ins)(mask & 0xf) << 20;
798 	sljit_ins ri2 = (sljit_ins)target & 0xffff;
799 	return 0xa7040000L | m1 | ri2;
800 }
801 
SLJIT_S390X_INSTRUCTION(brcl,sljit_uw mask,sljit_sw target)802 SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target)
803 {
804 	sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36;
805 	sljit_ins ri2 = (sljit_ins)target & 0xffffffff;
806 	return 0xc00400000000L | m1 | ri2;
807 }
808 
SLJIT_S390X_INSTRUCTION(flogr,sljit_gpr dst,sljit_gpr src)809 SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src)
810 {
811 	sljit_ins r1 = ((sljit_ins)dst & 0xf) << 8;
812 	sljit_ins r2 = ((sljit_ins)src & 0xf);
813 	SLJIT_ASSERT(have_eimm());
814 	return 0xb9830000 | r1 | r2;
815 }
816 
817 /* INSERT PROGRAM MASK */
SLJIT_S390X_INSTRUCTION(ipm,sljit_gpr dst)818 SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst)
819 {
820 	return 0xb2220000 | ((sljit_ins)(dst & 0xf) << 4);
821 }
822 
823 /* SET PROGRAM MASK */
SLJIT_S390X_INSTRUCTION(spm,sljit_gpr dst)824 SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst)
825 {
826 	return 0x0400 | ((sljit_ins)(dst & 0xf) << 4);
827 }
828 
829 /* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */
SLJIT_S390X_INSTRUCTION(risbhgz,sljit_gpr dst,sljit_gpr src,sljit_u8 start,sljit_u8 end,sljit_u8 rot)830 SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot)
831 {
832 	return risbhg(dst, src, start, 0x8 | end, rot);
833 }
834 
835 #undef SLJIT_S390X_INSTRUCTION
836 
update_zero_overflow(struct sljit_compiler * compiler,sljit_s32 op,sljit_gpr dst_r)837 static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r)
838 {
839 	/* Condition codes: bits 18 and 19.
840 	   Transformation:
841 	     0 (zero and no overflow) : unchanged
842 	     1 (non-zero and no overflow) : unchanged
843 	     2 (zero and overflow) : decreased by 1
844 	     3 (non-zero and overflow) : decreased by 1 if non-zero */
845 	FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_I32_OP) ? 1 : 2) + 2 + 3 + 1)));
846 	FAIL_IF(push_inst(compiler, ipm(flag_r)));
847 	FAIL_IF(push_inst(compiler, (op & SLJIT_I32_OP) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)));
848 	FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3)));
849 	FAIL_IF(push_inst(compiler, slfi(flag_r, 0x10000000)));
850 	FAIL_IF(push_inst(compiler, spm(flag_r)));
851 	return SLJIT_SUCCESS;
852 }
853 
854 /* load 64-bit immediate into register without clobbering flags */
push_load_imm_inst(struct sljit_compiler * compiler,sljit_gpr target,sljit_sw v)855 static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr target, sljit_sw v)
856 {
857 	/* 4 byte instructions */
858 	if (is_s16(v))
859 		return push_inst(compiler, lghi(target, (sljit_s16)v));
860 
861 	if ((sljit_uw)v == (v & 0x000000000000ffffU))
862 		return push_inst(compiler, llill(target, (sljit_u16)v));
863 
864 	if ((sljit_uw)v == (v & 0x00000000ffff0000U))
865 		return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16)));
866 
867 	if ((sljit_uw)v == (v & 0x0000ffff00000000U))
868 		return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32)));
869 
870 	if ((sljit_uw)v == (v & 0xffff000000000000U))
871 		return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48)));
872 
873 	/* 6 byte instructions (requires extended immediate facility) */
874 	if (have_eimm()) {
875 		if (is_s32(v))
876 			return push_inst(compiler, lgfi(target, (sljit_s32)v));
877 
878 		if ((sljit_uw)v == (v & 0x00000000ffffffffU))
879 			return push_inst(compiler, llilf(target, (sljit_u32)v));
880 
881 		if ((sljit_uw)v == (v & 0xffffffff00000000U))
882 			return push_inst(compiler, llihf(target, (sljit_u32)(v >> 32)));
883 
884 		FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v)));
885 		return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32)));
886 	}
887 	/* TODO(mundaym): instruction sequences that don't use extended immediates */
888 	abort();
889 }
890 
891 struct addr {
892 	sljit_gpr base;
893 	sljit_gpr index;
894 	sljit_sw  offset;
895 };
896 
897 /* transform memory operand into D(X,B) form with a signed 20-bit offset */
make_addr_bxy(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)898 static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler,
899 	struct addr *addr, sljit_s32 mem, sljit_sw off,
900 	sljit_gpr tmp /* clobbered, must not be r0 */)
901 {
902 	sljit_gpr base = r0;
903 	sljit_gpr index = r0;
904 
905 	SLJIT_ASSERT(tmp != r0);
906 	if (mem & REG_MASK)
907 		base = gpr(mem & REG_MASK);
908 
909 	if (mem & OFFS_REG_MASK) {
910 		index = gpr(OFFS_REG(mem));
911 		if (off != 0) {
912 			/* shift and put the result into tmp */
913 			SLJIT_ASSERT(0 <= off && off < 64);
914 			FAIL_IF(push_inst(compiler, sllg(tmp, index, off, 0)));
915 			index = tmp;
916 			off = 0; /* clear offset */
917 		}
918 	}
919 	else if (!is_s20(off)) {
920 		FAIL_IF(push_load_imm_inst(compiler, tmp, off));
921 		index = tmp;
922 		off = 0; /* clear offset */
923 	}
924 	addr->base = base;
925 	addr->index = index;
926 	addr->offset = off;
927 	return SLJIT_SUCCESS;
928 }
929 
930 /* transform memory operand into D(X,B) form with an unsigned 12-bit offset */
make_addr_bx(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)931 static sljit_s32 make_addr_bx(struct sljit_compiler *compiler,
932 	struct addr *addr, sljit_s32 mem, sljit_sw off,
933 	sljit_gpr tmp /* clobbered, must not be r0 */)
934 {
935 	sljit_gpr base = r0;
936 	sljit_gpr index = r0;
937 
938 	SLJIT_ASSERT(tmp != r0);
939 	if (mem & REG_MASK)
940 		base = gpr(mem & REG_MASK);
941 
942 	if (mem & OFFS_REG_MASK) {
943 		index = gpr(OFFS_REG(mem));
944 		if (off != 0) {
945 			/* shift and put the result into tmp */
946 			SLJIT_ASSERT(0 <= off && off < 64);
947 			FAIL_IF(push_inst(compiler, sllg(tmp, index, off, 0)));
948 			index = tmp;
949 			off = 0; /* clear offset */
950 		}
951 	}
952 	else if (!is_u12(off)) {
953 		FAIL_IF(push_load_imm_inst(compiler, tmp, off));
954 		index = tmp;
955 		off = 0; /* clear offset */
956 	}
957 	addr->base = base;
958 	addr->index = index;
959 	addr->offset = off;
960 	return SLJIT_SUCCESS;
961 }
962 
963 #define EVAL(op, r, addr) op(r, addr.offset, addr.index, addr.base)
964 #define WHEN(cond, r, i1, i2, addr) \
965 	(cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)
966 
967 /* May clobber tmp1. */
load_word(struct sljit_compiler * compiler,sljit_gpr dst,sljit_s32 src,sljit_sw srcw,sljit_s32 is_32bit)968 static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst,
969 		sljit_s32 src, sljit_sw srcw,
970 		sljit_s32 is_32bit)
971 {
972 	struct addr addr;
973 	sljit_ins ins;
974 
975 	SLJIT_ASSERT(src & SLJIT_MEM);
976 	if (have_ldisp() || !is_32bit)
977 		FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
978 	else
979 		FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
980 
981 	if (is_32bit)
982 		ins = WHEN(is_u12(addr.offset), dst, l, ly, addr);
983 	else
984 		ins = lg(dst, addr.offset, addr.index, addr.base);
985 
986 	return push_inst(compiler, ins);
987 }
988 
989 /* May clobber tmp1. */
store_word(struct sljit_compiler * compiler,sljit_gpr src,sljit_s32 dst,sljit_sw dstw,sljit_s32 is_32bit)990 static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src,
991 		sljit_s32 dst, sljit_sw dstw,
992 		sljit_s32 is_32bit)
993 {
994 	struct addr addr;
995 	sljit_ins ins;
996 
997 	SLJIT_ASSERT(dst & SLJIT_MEM);
998 	if (have_ldisp() || !is_32bit)
999 		FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));
1000 	else
1001 		FAIL_IF(make_addr_bx(compiler, &addr, dst, dstw, tmp1));
1002 
1003 	if (is_32bit)
1004 		ins = WHEN(is_u12(addr.offset), src, st, sty, addr);
1005 	else
1006 		ins = stg(src, addr.offset, addr.index, addr.base);
1007 
1008 	return push_inst(compiler, ins);
1009 }
1010 
1011 #undef WHEN
1012 
emit_move(struct sljit_compiler * compiler,sljit_gpr dst_r,sljit_s32 src,sljit_sw srcw)1013 static sljit_s32 emit_move(struct sljit_compiler *compiler,
1014 	sljit_gpr dst_r,
1015 	sljit_s32 src, sljit_sw srcw)
1016 {
1017 	SLJIT_ASSERT(!SLOW_IS_REG(src) || dst_r != gpr(src & REG_MASK));
1018 
1019 	if (src & SLJIT_IMM)
1020 		return push_load_imm_inst(compiler, dst_r, srcw);
1021 
1022 	if (src & SLJIT_MEM)
1023 		return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_I32_OP) != 0);
1024 
1025 	sljit_gpr src_r = gpr(src & REG_MASK);
1026 	return push_inst(compiler, (compiler->mode & SLJIT_I32_OP) ? lr(dst_r, src_r) : lgr(dst_r, src_r));
1027 }
1028 
emit_rr(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1029 static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins,
1030 	sljit_s32 dst,
1031 	sljit_s32 src1, sljit_sw src1w,
1032 	sljit_s32 src2, sljit_sw src2w)
1033 {
1034 	sljit_gpr dst_r = tmp0;
1035 	sljit_gpr src_r = tmp1;
1036 	sljit_s32 needs_move = 1;
1037 
1038 	if (SLOW_IS_REG(dst)) {
1039 		dst_r = gpr(dst & REG_MASK);
1040 
1041 		if (dst == src1)
1042 			needs_move = 0;
1043 		else if (dst == src2) {
1044 			dst_r = tmp0;
1045 			needs_move = 2;
1046 		}
1047 	}
1048 
1049 	if (needs_move)
1050 		FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1051 
1052 	if (FAST_IS_REG(src2))
1053 		src_r = gpr(src2 & REG_MASK);
1054 	else
1055 		FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1056 
1057 	FAIL_IF(push_inst(compiler, ins | (dst_r << 4) | src_r));
1058 
1059 	if (needs_move != 2)
1060 		return SLJIT_SUCCESS;
1061 
1062 	dst_r = gpr(dst & REG_MASK);
1063 	return push_inst(compiler, (compiler->mode & SLJIT_I32_OP) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1064 }
1065 
emit_rrf(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1066 static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins,
1067 	sljit_s32 dst,
1068 	sljit_s32 src1, sljit_sw src1w,
1069 	sljit_s32 src2, sljit_sw src2w)
1070 {
1071 	sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
1072 	sljit_gpr src1_r = tmp0;
1073 	sljit_gpr src2_r = tmp1;
1074 
1075 	if (FAST_IS_REG(src1))
1076 		src1_r = gpr(src1 & REG_MASK);
1077 	else
1078 		FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1079 
1080 	if (FAST_IS_REG(src2))
1081 		src2_r = gpr(src2 & REG_MASK);
1082 	else
1083 		FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1084 
1085 	return push_inst(compiler, ins | (dst_r << 4) | src1_r | (src2_r << 12));
1086 }
1087 
1088 typedef enum {
1089 	RI_A,
1090 	RIL_A,
1091 } emit_ril_type;
1092 
emit_ri(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_sw src2w,emit_ril_type type)1093 static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins,
1094 	sljit_s32 dst,
1095 	sljit_s32 src1, sljit_sw src1w,
1096 	sljit_sw src2w,
1097 	emit_ril_type type)
1098 {
1099 	sljit_gpr dst_r = tmp0;
1100 	sljit_s32 needs_move = 1;
1101 
1102 	if (SLOW_IS_REG(dst)) {
1103 		dst_r = gpr(dst & REG_MASK);
1104 
1105 		if (dst == src1)
1106 			needs_move = 0;
1107 	}
1108 
1109 	if (needs_move)
1110 		FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1111 
1112 	if (type == RIL_A)
1113 		return push_inst(compiler, ins | (dst_r << 36) | (src2w & 0xffffffff));
1114 	return push_inst(compiler, ins | (dst_r << 20) | (src2w & 0xffff));
1115 }
1116 
emit_rie_d(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_sw src2w)1117 static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins,
1118 	sljit_s32 dst,
1119 	sljit_s32 src1, sljit_sw src1w,
1120 	sljit_sw src2w)
1121 {
1122 	sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
1123 	sljit_gpr src_r = tmp0;
1124 
1125 	if (!SLOW_IS_REG(src1))
1126 		FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1127 	else
1128 		src_r = gpr(src1 & REG_MASK);
1129 
1130 	return push_inst(compiler, ins | (dst_r << 36) | (src_r << 32) | (src2w & 0xffff) << 16);
1131 }
1132 
1133 typedef enum {
1134 	RX_A,
1135 	RXY_A,
1136 } emit_rx_type;
1137 
emit_rx(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w,emit_rx_type type)1138 static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins,
1139 	sljit_s32 dst,
1140 	sljit_s32 src1, sljit_sw src1w,
1141 	sljit_s32 src2, sljit_sw src2w,
1142 	emit_rx_type type)
1143 {
1144 	sljit_gpr dst_r = tmp0;
1145 	sljit_s32 needs_move = 1;
1146 	sljit_gpr base, index;
1147 
1148 	SLJIT_ASSERT(src2 & SLJIT_MEM);
1149 
1150 	if (SLOW_IS_REG(dst)) {
1151 		dst_r = gpr(dst);
1152 
1153 		if (dst == src1)
1154 			needs_move = 0;
1155 		else if (dst == (src2 & REG_MASK) || (dst == OFFS_REG(src2))) {
1156 			dst_r = tmp0;
1157 			needs_move = 2;
1158 		}
1159 	}
1160 
1161 	if (needs_move)
1162 		FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1163 
1164 	base = gpr(src2 & REG_MASK);
1165 	index = tmp0;
1166 
1167 	if (src2 & OFFS_REG_MASK) {
1168 		index = gpr(OFFS_REG(src2));
1169 
1170 		if (src2w != 0) {
1171 			FAIL_IF(push_inst(compiler, sllg(tmp1, index, src2w & 0x3, 0)));
1172 			src2w = 0;
1173 			index = tmp1;
1174 		}
1175 	} else if ((type == RX_A && !is_u12(src2w)) || (type == RXY_A && !is_s20(src2w))) {
1176 		FAIL_IF(push_load_imm_inst(compiler, tmp1, src2w));
1177 
1178 		if (src2 & REG_MASK)
1179 			index = tmp1;
1180 		else
1181 			base = tmp1;
1182 		src2w = 0;
1183 	}
1184 
1185 	if (type == RX_A)
1186 		ins |= (dst_r << 20) | (index << 16) | (base << 12) | src2w;
1187 	else
1188 		ins |= (dst_r << 36) | (index << 32) | (base << 28) | disp_s20(src2w);
1189 
1190 	FAIL_IF(push_inst(compiler, ins));
1191 
1192 	if (needs_move != 2)
1193 		return SLJIT_SUCCESS;
1194 
1195 	dst_r = gpr(dst);
1196 	return push_inst(compiler, (compiler->mode & SLJIT_I32_OP) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1197 }
1198 
emit_siy(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_sw srcw)1199 static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins,
1200 	sljit_s32 dst, sljit_sw dstw,
1201 	sljit_sw srcw)
1202 {
1203 	SLJIT_ASSERT(dst & SLJIT_MEM);
1204 
1205 	sljit_gpr dst_r = tmp1;
1206 
1207 	if (dst & OFFS_REG_MASK) {
1208 		sljit_gpr index = tmp1;
1209 
1210 		if ((dstw & 0x3) == 0)
1211 			index = gpr(OFFS_REG(dst));
1212 		else
1213 			FAIL_IF(push_inst(compiler, sllg(tmp1, index, dstw & 0x3, 0)));
1214 
1215 		FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, index)));
1216 		dstw = 0;
1217 	}
1218 	else if (!is_s20(dstw)) {
1219 		FAIL_IF(push_load_imm_inst(compiler, tmp1, dstw));
1220 
1221 		if (dst & REG_MASK)
1222 			FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, tmp1)));
1223 
1224 		dstw = 0;
1225 	}
1226 	else
1227 		dst_r = gpr(dst & REG_MASK);
1228 
1229 	return push_inst(compiler, ins | ((srcw & 0xff) << 32) | (dst_r << 28) | disp_s20(dstw));
1230 }
1231 
1232 struct ins_forms {
1233 	sljit_ins op_r;
1234 	sljit_ins op_gr;
1235 	sljit_ins op_rk;
1236 	sljit_ins op_grk;
1237 	sljit_ins op;
1238 	sljit_ins op_y;
1239 	sljit_ins op_g;
1240 };
1241 
emit_commutative(struct sljit_compiler * compiler,const struct ins_forms * forms,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1242 static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1243 	sljit_s32 dst, sljit_sw dstw,
1244 	sljit_s32 src1, sljit_sw src1w,
1245 	sljit_s32 src2, sljit_sw src2w)
1246 {
1247 	sljit_s32 mode = compiler->mode;
1248 	sljit_ins ins, ins_k;
1249 
1250 	if ((src1 | src2) & SLJIT_MEM) {
1251 		sljit_ins ins12, ins20;
1252 
1253 		if (mode & SLJIT_I32_OP) {
1254 			ins12 = forms->op;
1255 			ins20 = forms->op_y;
1256 		}
1257 		else {
1258 			ins12 = 0;
1259 			ins20 = forms->op_g;
1260 		}
1261 
1262 		if (ins12 && ins20) {
1263 			/* Extra instructions needed for address computation can be executed independently. */
1264 			if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1265 					|| ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : is_s20(src1w)))) {
1266 				if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1267 					return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1268 
1269 				return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1270 			}
1271 
1272 			if (src1 & SLJIT_MEM) {
1273 				if ((src1 & OFFS_REG_MASK) || is_u12(src1w) || !is_s20(src1w))
1274 					return emit_rx(compiler, ins12, dst, src2, src2w, src1, src1w, RX_A);
1275 
1276 				return emit_rx(compiler, ins20, dst, src2, src2w, src1, src1w, RXY_A);
1277 			}
1278 		}
1279 		else if (ins12 || ins20) {
1280 			emit_rx_type rx_type;
1281 
1282 			if (ins12) {
1283 				rx_type = RX_A;
1284 				ins = ins12;
1285 			}
1286 			else {
1287 				rx_type = RXY_A;
1288 				ins = ins20;
1289 			}
1290 
1291 			if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1292 					|| ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : (rx_type == RX_A ? is_u12(src1w) : is_s20(src1w)))))
1293 				return emit_rx(compiler, ins, dst, src1, src1w, src2, src2w, rx_type);
1294 
1295 			if (src1 & SLJIT_MEM)
1296 				return emit_rx(compiler, ins, dst, src2, src2w, src1, src1w, rx_type);
1297 		}
1298 	}
1299 
1300 	if (mode & SLJIT_I32_OP) {
1301 		ins = forms->op_r;
1302 		ins_k = forms->op_rk;
1303 	}
1304 	else {
1305 		ins = forms->op_gr;
1306 		ins_k = forms->op_grk;
1307 	}
1308 
1309 	SLJIT_ASSERT(ins != 0 || ins_k != 0);
1310 
1311 	if (ins && SLOW_IS_REG(dst)) {
1312 		if (dst == src1)
1313 			return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1314 
1315 		if (dst == src2)
1316 			return emit_rr(compiler, ins, dst, src2, src2w, src1, src1w);
1317 	}
1318 
1319 	if (ins_k == 0)
1320 		return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1321 
1322 	return emit_rrf(compiler, ins_k, dst, src1, src1w, src2, src2w);
1323 }
1324 
emit_non_commutative(struct sljit_compiler * compiler,const struct ins_forms * forms,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1325 static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1326 	sljit_s32 dst, sljit_sw dstw,
1327 	sljit_s32 src1, sljit_sw src1w,
1328 	sljit_s32 src2, sljit_sw src2w)
1329 {
1330 	sljit_s32 mode = compiler->mode;
1331 	sljit_ins ins;
1332 
1333 	if (src2 & SLJIT_MEM) {
1334 		sljit_ins ins12, ins20;
1335 
1336 		if (mode & SLJIT_I32_OP) {
1337 			ins12 = forms->op;
1338 			ins20 = forms->op_y;
1339 		}
1340 		else {
1341 			ins12 = 0;
1342 			ins20 = forms->op_g;
1343 		}
1344 
1345 		if (ins12 && ins20) {
1346 			if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1347 				return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1348 
1349 			return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1350 		}
1351 		else if (ins12)
1352 			return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1353 		else if (ins20)
1354 			return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1355 	}
1356 
1357 	ins = (mode & SLJIT_I32_OP) ? forms->op_rk : forms->op_grk;
1358 
1359 	if (ins == 0 || (SLOW_IS_REG(dst) && dst == src1))
1360 		return emit_rr(compiler, (mode & SLJIT_I32_OP) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w);
1361 
1362 	return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w);
1363 }
1364 
sljit_generate_code(struct sljit_compiler * compiler)1365 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
1366 {
1367 	struct sljit_label *label;
1368 	struct sljit_jump *jump;
1369 	struct sljit_s390x_const *const_;
1370 	struct sljit_put_label *put_label;
1371 	sljit_sw executable_offset;
1372 	sljit_uw ins_size = 0; /* instructions */
1373 	sljit_uw pool_size = 0; /* literal pool */
1374 	sljit_uw pad_size;
1375 	sljit_uw i, j = 0;
1376 	struct sljit_memory_fragment *buf;
1377 	void *code, *code_ptr;
1378 	sljit_uw *pool, *pool_ptr;
1379 
1380 	sljit_uw source;
1381 	sljit_sw offset; /* TODO(carenas): only need 32 bit */
1382 
1383 	CHECK_ERROR_PTR();
1384 	CHECK_PTR(check_sljit_generate_code(compiler));
1385 	reverse_buf(compiler);
1386 
1387 	/* branch handling */
1388 	label = compiler->labels;
1389 	jump = compiler->jumps;
1390 	put_label = compiler->put_labels;
1391 
1392 	/* TODO(carenas): compiler->executable_size could be calculated
1393          *                before to avoid the following loop (except for
1394          *                pool_size)
1395          */
1396 	/* calculate the size of the code */
1397 	for (buf = compiler->buf; buf != NULL; buf = buf->next) {
1398 		sljit_uw len = buf->used_size / sizeof(sljit_ins);
1399 		sljit_ins *ibuf = (sljit_ins *)buf->memory;
1400 		for (i = 0; i < len; ++i, ++j) {
1401 			sljit_ins ins = ibuf[i];
1402 
1403 			/* TODO(carenas): instruction tag vs size/addr == j
1404 			 * using instruction tags for const is creative
1405 			 * but unlike all other architectures, and is not
1406 			 * done consistently for all other objects.
1407 			 * This might need reviewing later.
1408 			 */
1409 			if (ins & sljit_ins_const) {
1410 				pool_size += sizeof(*pool);
1411 				ins &= ~sljit_ins_const;
1412 			}
1413 			if (label && label->size == j) {
1414 				label->size = ins_size;
1415 				label = label->next;
1416 			}
1417 			if (jump && jump->addr == j) {
1418 				if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
1419 					/* encoded: */
1420 					/*   brasl %r14, <rel_addr> (or brcl <mask>, <rel_addr>) */
1421 					/* replace with: */
1422 					/*   lgrl %r1, <pool_addr> */
1423 					/*   bras %r14, %r1 (or bcr <mask>, %r1) */
1424 					pool_size += sizeof(*pool);
1425 					ins_size += 2;
1426 				}
1427 				jump = jump->next;
1428 			}
1429 			if (put_label && put_label->addr == j) {
1430 				pool_size += sizeof(*pool);
1431 				put_label = put_label->next;
1432 			}
1433 			ins_size += sizeof_ins(ins);
1434 		}
1435 	}
1436 
1437 	/* emit trailing label */
1438 	if (label && label->size == j) {
1439 		label->size = ins_size;
1440 		label = label->next;
1441 	}
1442 
1443 	SLJIT_ASSERT(!label);
1444 	SLJIT_ASSERT(!jump);
1445 	SLJIT_ASSERT(!put_label);
1446 
1447 	/* pad code size to 8 bytes so is accessible with half word offsets */
1448 	/* the literal pool needs to be doubleword aligned */
1449 	pad_size = ((ins_size + 7UL) & ~7UL) - ins_size;
1450 	SLJIT_ASSERT(pad_size < 8UL);
1451 
1452 	/* allocate target buffer */
1453 	code = SLJIT_MALLOC_EXEC(ins_size + pad_size + pool_size,
1454 					compiler->exec_allocator_data);
1455 	PTR_FAIL_WITH_EXEC_IF(code);
1456 	code_ptr = code;
1457 	executable_offset = SLJIT_EXEC_OFFSET(code);
1458 
1459 	/* TODO(carenas): pool is optional, and the ABI recommends it to
1460          *                be created before the function code, instead of
1461          *                globally; if generated code is too big could
1462          *                need offsets bigger than 32bit words and asser()
1463          */
1464 	pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size);
1465 	pool_ptr = pool;
1466 	const_ = (struct sljit_s390x_const *)compiler->consts;
1467 
1468 	/* update label addresses */
1469 	label = compiler->labels;
1470 	while (label) {
1471 		label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(
1472 			(sljit_uw)code_ptr + label->size, executable_offset);
1473 		label = label->next;
1474 	}
1475 
1476 	/* reset jumps */
1477 	jump = compiler->jumps;
1478 	put_label = compiler->put_labels;
1479 
1480 	/* emit the code */
1481 	j = 0;
1482 	for (buf = compiler->buf; buf != NULL; buf = buf->next) {
1483 		sljit_uw len = buf->used_size / sizeof(sljit_ins);
1484 		sljit_ins *ibuf = (sljit_ins *)buf->memory;
1485 		for (i = 0; i < len; ++i, ++j) {
1486 			sljit_ins ins = ibuf[i];
1487 			if (ins & sljit_ins_const) {
1488 				/* clear the const tag */
1489 				ins &= ~sljit_ins_const;
1490 
1491 				/* update instruction with relative address of constant */
1492 				source = (sljit_uw)code_ptr;
1493 				offset = (sljit_uw)pool_ptr - source;
1494 				SLJIT_ASSERT(!(offset & 1));
1495 				offset >>= 1; /* halfword (not byte) offset */
1496 				SLJIT_ASSERT(is_s32(offset));
1497 				ins |= (sljit_ins)offset & 0xffffffff;
1498 
1499 				/* update address */
1500 				const_->const_.addr = (sljit_uw)pool_ptr;
1501 
1502 				/* store initial value into pool and update pool address */
1503 				*(pool_ptr++) = const_->init_value;
1504 
1505 				/* move to next constant */
1506 				const_ = (struct sljit_s390x_const *)const_->const_.next;
1507 			}
1508 			if (jump && jump->addr == j) {
1509 				sljit_sw target = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
1510 				if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
1511 					jump->addr = (sljit_uw)pool_ptr;
1512 
1513 					/* load address into tmp1 */
1514 					source = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1515 					offset = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1516 					SLJIT_ASSERT(!(offset & 1));
1517 					offset >>= 1;
1518 					SLJIT_ASSERT(is_s32(offset));
1519 					encode_inst(&code_ptr,
1520 						lgrl(tmp1, offset & 0xffffffff));
1521 
1522 					/* store jump target into pool and update pool address */
1523 					*(pool_ptr++) = target;
1524 
1525 					/* branch to tmp1 */
1526 					sljit_ins op = (ins >> 32) & 0xf;
1527 					sljit_ins arg = (ins >> 36) & 0xf;
1528 					switch (op) {
1529 					case 4: /* brcl -> bcr */
1530 						ins = bcr(arg, tmp1);
1531 						break;
1532 					case 5: /* brasl -> basr */
1533 						ins = basr(arg, tmp1);
1534 						break;
1535 					default:
1536 						abort();
1537 					}
1538 				}
1539 				else {
1540 					jump->addr = (sljit_uw)code_ptr + 2;
1541 					source = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1542 					offset = target - source;
1543 
1544 					/* offset must be halfword aligned */
1545 					SLJIT_ASSERT(!(offset & 1));
1546 					offset >>= 1;
1547 					SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */
1548 
1549 					/* patch jump target */
1550 					ins |= (sljit_ins)offset & 0xffffffff;
1551 				}
1552 				jump = jump->next;
1553 			}
1554 			if (put_label && put_label->addr == j) {
1555 				source = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1556 
1557 				SLJIT_ASSERT(put_label->label);
1558 				put_label->addr = (sljit_uw)code_ptr;
1559 
1560 				/* store target into pool */
1561 				*pool_ptr = put_label->label->addr;
1562 				offset = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1563 				pool_ptr++;
1564 
1565 				SLJIT_ASSERT(!(offset & 1));
1566 				offset >>= 1;
1567 				SLJIT_ASSERT(is_s32(offset));
1568 				ins |= (sljit_ins)offset & 0xffffffff;
1569 
1570 				put_label = put_label->next;
1571 			}
1572 			encode_inst(&code_ptr, ins);
1573 		}
1574 	}
1575 	SLJIT_ASSERT((sljit_u8 *)code + ins_size == code_ptr);
1576 	SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr);
1577 
1578 	compiler->error = SLJIT_ERR_COMPILED;
1579 	compiler->executable_offset = executable_offset;
1580 	compiler->executable_size = ins_size;
1581 	code = SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1582 	code_ptr = SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1583 	SLJIT_CACHE_FLUSH(code, code_ptr);
1584 	SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
1585 	return code;
1586 }
1587 
sljit_has_cpu_feature(sljit_s32 feature_type)1588 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1589 {
1590 	/* TODO(mundaym): implement all */
1591 	switch (feature_type) {
1592 	case SLJIT_HAS_CLZ:
1593 		return have_eimm() ? 1 : 0; /* FLOGR instruction */
1594 	case SLJIT_HAS_CMOV:
1595 		return have_lscond1() ? 1 : 0;
1596 	case SLJIT_HAS_FPU:
1597 		return 0;
1598 	}
1599 	return 0;
1600 }
1601 
1602 /* --------------------------------------------------------------------- */
1603 /*  Entry, exit                                                          */
1604 /* --------------------------------------------------------------------- */
1605 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1606 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1607 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1608 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1609 {
1610 	sljit_s32 args = get_arg_count(arg_types);
1611 	sljit_sw frame_size;
1612 
1613 	CHECK_ERROR();
1614 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1615 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1616 
1617 	/* saved registers go in callee allocated save area */
1618 	compiler->local_size = (local_size + 0xf) & ~0xf;
1619 	frame_size = compiler->local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE;
1620 
1621 	FAIL_IF(push_inst(compiler, stmg(r6, r15, r6 * sizeof(sljit_sw), r15))); /* save registers TODO(MGM): optimize */
1622 	if (frame_size != 0) {
1623 		if (is_s16(-frame_size))
1624 			FAIL_IF(push_inst(compiler, aghi(r15, -((sljit_s16)frame_size))));
1625 		else if (is_s32(-frame_size))
1626 			FAIL_IF(push_inst(compiler, agfi(r15, -((sljit_s32)frame_size))));
1627 		else {
1628 			FAIL_IF(push_load_imm_inst(compiler, tmp1, -frame_size));
1629 			FAIL_IF(push_inst(compiler, la(r15, 0, tmp1, r15)));
1630 		}
1631 	}
1632 
1633 	if (args >= 1)
1634 		FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0), gpr(SLJIT_R0))));
1635 	if (args >= 2)
1636 		FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S1), gpr(SLJIT_R1))));
1637 	if (args >= 3)
1638 		FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S2), gpr(SLJIT_R2))));
1639 	SLJIT_ASSERT(args < 4);
1640 
1641 	return SLJIT_SUCCESS;
1642 }
1643 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1644 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1645 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1646 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1647 {
1648 	CHECK_ERROR();
1649 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1650 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1651 
1652 	/* TODO(mundaym): stack space for saved floating point registers */
1653 	compiler->local_size = (local_size + 0xf) & ~0xf;
1654 	return SLJIT_SUCCESS;
1655 }
1656 
sljit_emit_return(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)1657 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
1658 {
1659 	sljit_sw size;
1660 	sljit_gpr end;
1661 
1662 	CHECK_ERROR();
1663 	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
1664 
1665 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
1666 
1667 	size = compiler->local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + (r6 * sizeof(sljit_sw));
1668 	if (!is_s20(size)) {
1669 		FAIL_IF(push_load_imm_inst(compiler, tmp1, compiler->local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE));
1670 		FAIL_IF(push_inst(compiler, la(r15, 0, tmp1, r15)));
1671 		size = r6 * sizeof(sljit_sw);
1672 		end = r14; /* r15 has been restored already */
1673 	}
1674 	else
1675 		end = r15;
1676 
1677 	FAIL_IF(push_inst(compiler, lmg(r6, end, size, r15))); /* restore registers TODO(MGM): optimize */
1678 	FAIL_IF(push_inst(compiler, br(r14))); /* return */
1679 
1680 	return SLJIT_SUCCESS;
1681 }
1682 
1683 /* --------------------------------------------------------------------- */
1684 /*  Operators                                                            */
1685 /* --------------------------------------------------------------------- */
1686 
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1687 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1688 {
1689 	sljit_gpr arg0 = gpr(SLJIT_R0);
1690 	sljit_gpr arg1 = gpr(SLJIT_R1);
1691 
1692 	CHECK_ERROR();
1693 	CHECK(check_sljit_emit_op0(compiler, op));
1694 
1695 	op = GET_OPCODE(op) | (op & SLJIT_I32_OP);
1696 	switch (op) {
1697 	case SLJIT_BREAKPOINT:
1698 		/* The following invalid instruction is emitted by gdb. */
1699 		return push_inst(compiler, 0x0001 /* 2-byte trap */);
1700 	case SLJIT_NOP:
1701 		return push_inst(compiler, 0x0700 /* 2-byte nop */);
1702 	case SLJIT_LMUL_UW:
1703 		FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1704 		break;
1705 	case SLJIT_LMUL_SW:
1706 		/* signed multiplication from: */
1707 		/* Hacker's Delight, Second Edition: Chapter 8-3. */
1708 		FAIL_IF(push_inst(compiler, srag(tmp0, arg0, 63, 0)));
1709 		FAIL_IF(push_inst(compiler, srag(tmp1, arg1, 63, 0)));
1710 		FAIL_IF(push_inst(compiler, ngr(tmp0, arg1)));
1711 		FAIL_IF(push_inst(compiler, ngr(tmp1, arg0)));
1712 
1713 		/* unsigned multiplication */
1714 		FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1715 
1716 		FAIL_IF(push_inst(compiler, sgr(arg0, tmp0)));
1717 		FAIL_IF(push_inst(compiler, sgr(arg0, tmp1)));
1718 		break;
1719 	case SLJIT_DIV_U32:
1720 	case SLJIT_DIVMOD_U32:
1721 		FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1722 		FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1723 		FAIL_IF(push_inst(compiler, dlr(tmp0, arg1)));
1724 		FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1725 		if (op == SLJIT_DIVMOD_U32)
1726 			return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1727 
1728 		return SLJIT_SUCCESS;
1729 	case SLJIT_DIV_S32:
1730 	case SLJIT_DIVMOD_S32:
1731 		FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1732 		FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1733 		FAIL_IF(push_inst(compiler, dr(tmp0, arg1)));
1734 		FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1735 		if (op == SLJIT_DIVMOD_S32)
1736 			return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1737 
1738 		return SLJIT_SUCCESS;
1739 	case SLJIT_DIV_UW:
1740 	case SLJIT_DIVMOD_UW:
1741 		FAIL_IF(push_inst(compiler, lghi(tmp0, 0)));
1742 		FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1743 		FAIL_IF(push_inst(compiler, dlgr(tmp0, arg1)));
1744 		FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1745 		if (op == SLJIT_DIVMOD_UW)
1746 			return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1747 
1748 		return SLJIT_SUCCESS;
1749 	case SLJIT_DIV_SW:
1750 	case SLJIT_DIVMOD_SW:
1751 		FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1752 		FAIL_IF(push_inst(compiler, dsgr(tmp0, arg1)));
1753 		FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1754 		if (op == SLJIT_DIVMOD_SW)
1755 			return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1756 
1757 		return SLJIT_SUCCESS;
1758 	case SLJIT_ENDBR:
1759 		return SLJIT_SUCCESS;
1760 	case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1761 		return SLJIT_SUCCESS;
1762 	default:
1763 		SLJIT_UNREACHABLE();
1764 	}
1765 	/* swap result registers */
1766 	FAIL_IF(push_inst(compiler, lgr(tmp0, arg0)));
1767 	FAIL_IF(push_inst(compiler, lgr(arg0, arg1)));
1768 	return push_inst(compiler, lgr(arg1, tmp0));
1769 }
1770 
1771 /* LEVAL will be defined later with different parameters as needed */
1772 #define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)
1773 
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1774 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1775         sljit_s32 dst, sljit_sw dstw,
1776         sljit_s32 src, sljit_sw srcw)
1777 {
1778 	sljit_ins ins;
1779 	struct addr mem;
1780 	sljit_gpr dst_r;
1781 	sljit_gpr src_r;
1782 	sljit_s32 opcode = GET_OPCODE(op);
1783 
1784 	CHECK_ERROR();
1785 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1786 	ADJUST_LOCAL_OFFSET(dst, dstw);
1787 	ADJUST_LOCAL_OFFSET(src, srcw);
1788 
1789 	if ((dst == SLJIT_UNUSED) && !HAS_FLAGS(op)) {
1790 		/* TODO(carenas): implement prefetch? */
1791 		return SLJIT_SUCCESS;
1792 	}
1793 
1794 	if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) {
1795 		/* LOAD REGISTER */
1796 		if (FAST_IS_REG(dst) && FAST_IS_REG(src)) {
1797 			dst_r = gpr(dst);
1798 			src_r = gpr(src);
1799 			switch (opcode | (op & SLJIT_I32_OP)) {
1800 			/* 32-bit */
1801 			case SLJIT_MOV32_U8:
1802 				ins = llcr(dst_r, src_r);
1803 				break;
1804 			case SLJIT_MOV32_S8:
1805 				ins = lbr(dst_r, src_r);
1806 				break;
1807 			case SLJIT_MOV32_U16:
1808 				ins = llhr(dst_r, src_r);
1809 				break;
1810 			case SLJIT_MOV32_S16:
1811 				ins = lhr(dst_r, src_r);
1812 				break;
1813 			case SLJIT_MOV32:
1814 				ins = lr(dst_r, src_r);
1815 				break;
1816 			/* 64-bit */
1817 			case SLJIT_MOV_U8:
1818 				ins = llgcr(dst_r, src_r);
1819 				break;
1820 			case SLJIT_MOV_S8:
1821 				ins = lgbr(dst_r, src_r);
1822 				break;
1823 			case SLJIT_MOV_U16:
1824 				ins = llghr(dst_r, src_r);
1825 				break;
1826 			case SLJIT_MOV_S16:
1827 				ins = lghr(dst_r, src_r);
1828 				break;
1829 			case SLJIT_MOV_U32:
1830 				ins = llgfr(dst_r, src_r);
1831 				break;
1832 			case SLJIT_MOV_S32:
1833 				ins = lgfr(dst_r, src_r);
1834 				break;
1835 			case SLJIT_MOV:
1836 			case SLJIT_MOV_P:
1837 				ins = lgr(dst_r, src_r);
1838 				break;
1839 			default:
1840 				ins = 0;
1841 				SLJIT_UNREACHABLE();
1842 			}
1843 			FAIL_IF(push_inst(compiler, ins));
1844 			return SLJIT_SUCCESS;
1845 		}
1846 		/* LOAD IMMEDIATE */
1847 		if (FAST_IS_REG(dst) && (src & SLJIT_IMM)) {
1848 			switch (opcode) {
1849 			case SLJIT_MOV_U8:
1850 				srcw = (sljit_sw)((sljit_u8)(srcw));
1851 				break;
1852 			case SLJIT_MOV_S8:
1853 				srcw = (sljit_sw)((sljit_s8)(srcw));
1854 				break;
1855 			case SLJIT_MOV_U16:
1856 				srcw = (sljit_sw)((sljit_u16)(srcw));
1857 				break;
1858 			case SLJIT_MOV_S16:
1859 				srcw = (sljit_sw)((sljit_s16)(srcw));
1860 				break;
1861 			case SLJIT_MOV_U32:
1862 				srcw = (sljit_sw)((sljit_u32)(srcw));
1863 				break;
1864 			case SLJIT_MOV_S32:
1865 				srcw = (sljit_sw)((sljit_s32)(srcw));
1866 				break;
1867 			}
1868 			return push_load_imm_inst(compiler, gpr(dst), srcw);
1869 		}
1870 		/* LOAD */
1871 		/* TODO(carenas): avoid reg being defined later */
1872 		#define LEVAL(i) EVAL(i, reg, mem)
1873 		if (FAST_IS_REG(dst) && (src & SLJIT_MEM)) {
1874 			sljit_gpr reg = gpr(dst);
1875 
1876 			FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
1877 			/* TODO(carenas): convert all calls below to LEVAL */
1878 			switch (opcode | (op & SLJIT_I32_OP)) {
1879 			case SLJIT_MOV32_U8:
1880 				ins = llc(reg, mem.offset, mem.index, mem.base);
1881 				break;
1882 			case SLJIT_MOV32_S8:
1883 				ins = lb(reg, mem.offset, mem.index, mem.base);
1884 				break;
1885 			case SLJIT_MOV32_U16:
1886 				ins = llh(reg, mem.offset, mem.index, mem.base);
1887 				break;
1888 			case SLJIT_MOV32_S16:
1889 				ins = WHEN2(is_u12(mem.offset), lh, lhy);
1890 				break;
1891 			case SLJIT_MOV32:
1892 				ins = WHEN2(is_u12(mem.offset), l, ly);
1893 				break;
1894 			case SLJIT_MOV_U8:
1895 				ins = LEVAL(llgc);
1896 				break;
1897 			case SLJIT_MOV_S8:
1898 				ins = lgb(reg, mem.offset, mem.index, mem.base);
1899 				break;
1900 			case SLJIT_MOV_U16:
1901 				ins = LEVAL(llgh);
1902 				break;
1903 			case SLJIT_MOV_S16:
1904 				ins = lgh(reg, mem.offset, mem.index, mem.base);
1905 				break;
1906 			case SLJIT_MOV_U32:
1907 				ins = LEVAL(llgf);
1908 				break;
1909 			case SLJIT_MOV_S32:
1910 				ins = lgf(reg, mem.offset, mem.index, mem.base);
1911 				break;
1912 			case SLJIT_MOV_P:
1913 			case SLJIT_MOV:
1914 				ins = lg(reg, mem.offset, mem.index, mem.base);
1915 				break;
1916 			default:
1917 				SLJIT_UNREACHABLE();
1918 			}
1919 			FAIL_IF(push_inst(compiler, ins));
1920 			return SLJIT_SUCCESS;
1921 		}
1922 		/* STORE and STORE IMMEDIATE */
1923 		if ((dst & SLJIT_MEM)
1924 			&& (FAST_IS_REG(src) || (src & SLJIT_IMM))) {
1925 			sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0;
1926 			if (src & SLJIT_IMM) {
1927 				/* TODO(mundaym): MOVE IMMEDIATE? */
1928 				FAIL_IF(push_load_imm_inst(compiler, reg, srcw));
1929 			}
1930 			struct addr mem;
1931 			FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
1932 			switch (opcode) {
1933 			case SLJIT_MOV_U8:
1934 			case SLJIT_MOV_S8:
1935 				return push_inst(compiler,
1936 					WHEN2(is_u12(mem.offset), stc, stcy));
1937 			case SLJIT_MOV_U16:
1938 			case SLJIT_MOV_S16:
1939 				return push_inst(compiler,
1940 					WHEN2(is_u12(mem.offset), sth, sthy));
1941 			case SLJIT_MOV_U32:
1942 			case SLJIT_MOV_S32:
1943 				return push_inst(compiler,
1944 					WHEN2(is_u12(mem.offset), st, sty));
1945 			case SLJIT_MOV_P:
1946 			case SLJIT_MOV:
1947 				FAIL_IF(push_inst(compiler, LEVAL(stg)));
1948 				return SLJIT_SUCCESS;
1949 			default:
1950 				SLJIT_UNREACHABLE();
1951 			}
1952 		}
1953 		#undef LEVAL
1954 		/* MOVE CHARACTERS */
1955 		if ((dst & SLJIT_MEM) && (src & SLJIT_MEM)) {
1956 			struct addr mem;
1957 			FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
1958 			switch (opcode) {
1959 			case SLJIT_MOV_U8:
1960 			case SLJIT_MOV_S8:
1961 				FAIL_IF(push_inst(compiler,
1962 					EVAL(llgc, tmp0, mem)));
1963 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
1964 				return push_inst(compiler,
1965 					EVAL(stcy, tmp0, mem));
1966 			case SLJIT_MOV_U16:
1967 			case SLJIT_MOV_S16:
1968 				FAIL_IF(push_inst(compiler,
1969 					EVAL(llgh, tmp0, mem)));
1970 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
1971 				return push_inst(compiler,
1972 					EVAL(sthy, tmp0, mem));
1973 			case SLJIT_MOV_U32:
1974 			case SLJIT_MOV_S32:
1975 				FAIL_IF(push_inst(compiler,
1976 					EVAL(ly, tmp0, mem)));
1977 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
1978 				return push_inst(compiler,
1979 					EVAL(sty, tmp0, mem));
1980 			case SLJIT_MOV_P:
1981 			case SLJIT_MOV:
1982 				FAIL_IF(push_inst(compiler,
1983 					EVAL(lg, tmp0, mem)));
1984 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
1985 				FAIL_IF(push_inst(compiler,
1986 					EVAL(stg, tmp0, mem)));
1987 				return SLJIT_SUCCESS;
1988 			default:
1989 				SLJIT_UNREACHABLE();
1990 			}
1991 		}
1992 		SLJIT_UNREACHABLE();
1993 	}
1994 
1995 	SLJIT_ASSERT((src & SLJIT_IMM) == 0); /* no immediates */
1996 
1997 	dst_r = SLOW_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0;
1998 	src_r = FAST_IS_REG(src) ? gpr(REG_MASK & src) : tmp0;
1999 	if (src & SLJIT_MEM)
2000 		FAIL_IF(load_word(compiler, src_r, src, srcw, src & SLJIT_I32_OP));
2001 
2002 	compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2003 
2004 	/* TODO(mundaym): optimize loads and stores */
2005 	switch (opcode | (op & SLJIT_I32_OP)) {
2006 	case SLJIT_NOT:
2007 		/* emulate ~x with x^-1 */
2008 		FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
2009 		if (src_r != dst_r)
2010 			FAIL_IF(push_inst(compiler, lgr(dst_r, src_r)));
2011 
2012 		FAIL_IF(push_inst(compiler, xgr(dst_r, tmp1)));
2013 		break;
2014 	case SLJIT_NOT32:
2015 		/* emulate ~x with x^-1 */
2016 		if (have_eimm())
2017 			FAIL_IF(push_inst(compiler, xilf(dst_r, -1)));
2018 		else {
2019 			FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
2020 			if (src_r != dst_r)
2021 				FAIL_IF(push_inst(compiler, lr(dst_r, src_r)));
2022 
2023 			FAIL_IF(push_inst(compiler, xr(dst_r, tmp1)));
2024 		}
2025 		break;
2026 	case SLJIT_NEG:
2027 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB;
2028 		FAIL_IF(push_inst(compiler, lcgr(dst_r, src_r)));
2029 		break;
2030 	case SLJIT_NEG32:
2031 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB;
2032 		FAIL_IF(push_inst(compiler, lcr(dst_r, src_r)));
2033 		break;
2034 	case SLJIT_CLZ:
2035 		if (have_eimm()) {
2036 			FAIL_IF(push_inst(compiler, flogr(tmp0, src_r))); /* clobbers tmp1 */
2037 			if (dst_r != tmp0)
2038 				FAIL_IF(push_inst(compiler, lgr(dst_r, tmp0)));
2039 		} else {
2040 			abort(); /* TODO(mundaym): no eimm (?) */
2041 		}
2042 		break;
2043 	case SLJIT_CLZ32:
2044 		if (have_eimm()) {
2045 			FAIL_IF(push_inst(compiler, sllg(tmp1, src_r, 32, 0)));
2046 			FAIL_IF(push_inst(compiler, iilf(tmp1, 0xffffffff)));
2047 			FAIL_IF(push_inst(compiler, flogr(tmp0, tmp1))); /* clobbers tmp1 */
2048 			if (dst_r != tmp0)
2049 				FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
2050 		} else {
2051 			abort(); /* TODO(mundaym): no eimm (?) */
2052 		}
2053 		break;
2054 	default:
2055 		SLJIT_UNREACHABLE();
2056 	}
2057 
2058 	if ((op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW))
2059 		FAIL_IF(update_zero_overflow(compiler, op, dst_r));
2060 
2061 	/* TODO(carenas): doesn't need FAIL_IF */
2062 	if ((dst != SLJIT_UNUSED) && (dst & SLJIT_MEM))
2063 		FAIL_IF(store_word(compiler, dst_r, dst, dstw, op & SLJIT_I32_OP));
2064 
2065 	return SLJIT_SUCCESS;
2066 }
2067 
is_commutative(sljit_s32 op)2068 static SLJIT_INLINE int is_commutative(sljit_s32 op)
2069 {
2070 	switch (GET_OPCODE(op)) {
2071 	case SLJIT_ADD:
2072 	case SLJIT_ADDC:
2073 	case SLJIT_MUL:
2074 	case SLJIT_AND:
2075 	case SLJIT_OR:
2076 	case SLJIT_XOR:
2077 		return 1;
2078 	}
2079 	return 0;
2080 }
2081 
is_shift(sljit_s32 op)2082 static SLJIT_INLINE int is_shift(sljit_s32 op) {
2083 	sljit_s32 v = GET_OPCODE(op);
2084 	return (v == SLJIT_SHL || v == SLJIT_ASHR || v == SLJIT_LSHR) ? 1 : 0;
2085 }
2086 
sets_signed_flag(sljit_s32 op)2087 static SLJIT_INLINE int sets_signed_flag(sljit_s32 op)
2088 {
2089 	switch (GET_FLAG_TYPE(op)) {
2090 	case SLJIT_OVERFLOW:
2091 	case SLJIT_NOT_OVERFLOW:
2092 	case SLJIT_SIG_LESS:
2093 	case SLJIT_SIG_LESS_EQUAL:
2094 	case SLJIT_SIG_GREATER:
2095 	case SLJIT_SIG_GREATER_EQUAL:
2096 		return 1;
2097 	}
2098 	return 0;
2099 }
2100 
2101 static const struct ins_forms add_forms = {
2102 	0x1a00, /* ar */
2103 	0xb9080000, /* agr */
2104 	0xb9f80000, /* ark */
2105 	0xb9e80000, /* agrk */
2106 	0x5a000000, /* a */
2107 	0xe3000000005a, /* ay */
2108 	0xe30000000008, /* ag */
2109 };
2110 
2111 static const struct ins_forms logical_add_forms = {
2112 	0x1e00, /* alr */
2113 	0xb90a0000, /* algr */
2114 	0xb9fa0000, /* alrk */
2115 	0xb9ea0000, /* algrk */
2116 	0x5e000000, /* al */
2117 	0xe3000000005e, /* aly */
2118 	0xe3000000000a, /* alg */
2119 };
2120 
sljit_emit_add(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2121 static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op,
2122 	sljit_s32 dst, sljit_sw dstw,
2123 	sljit_s32 src1, sljit_sw src1w,
2124 	sljit_s32 src2, sljit_sw src2w)
2125 {
2126 	int sets_overflow = (op & VARIABLE_FLAG_MASK) == SLJIT_SET_OVERFLOW;
2127 	int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2128 	const struct ins_forms *forms;
2129 	sljit_ins ins;
2130 
2131 	if (src2 & SLJIT_IMM) {
2132 		if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2133 			if (sets_overflow)
2134 				ins = (op & SLJIT_I32_OP) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2135 			else
2136 				ins = (op & SLJIT_I32_OP) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2137 			return emit_siy(compiler, ins, dst, dstw, src2w);
2138 		}
2139 
2140 		if (is_s16(src2w)) {
2141 			if (sets_overflow)
2142 				ins = (op & SLJIT_I32_OP) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2143 			else
2144 				ins = (op & SLJIT_I32_OP) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2145 			FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w));
2146 			goto done;
2147 		}
2148 
2149 		if (!sets_overflow) {
2150 			if ((op & SLJIT_I32_OP) || is_u32(src2w)) {
2151 				ins = (op & SLJIT_I32_OP) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */;
2152 				FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2153 				goto done;
2154 			}
2155 			if (is_u32(-src2w)) {
2156 				FAIL_IF(emit_ri(compiler, 0xc20400000000 /* slgfi */, dst, src1, src1w, -src2w, RIL_A));
2157 				goto done;
2158 			}
2159 		}
2160 		else if ((op & SLJIT_I32_OP) || is_s32(src2w)) {
2161 			ins = (op & SLJIT_I32_OP) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2162 			FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2163 			goto done;
2164 		}
2165 	}
2166 
2167 	forms = sets_overflow ? &add_forms : &logical_add_forms;
2168 	FAIL_IF(emit_commutative(compiler, forms, dst, dstw, src1, src1w, src2, src2w));
2169 
2170 done:
2171 	if (sets_zero_overflow)
2172 		FAIL_IF(update_zero_overflow(compiler, op, SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0));
2173 
2174 	if (dst & SLJIT_MEM)
2175 		return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP);
2176 
2177 	return SLJIT_SUCCESS;
2178 }
2179 
2180 static const struct ins_forms sub_forms = {
2181 	0x1b00, /* sr */
2182 	0xb9090000, /* sgr */
2183 	0xb9f90000, /* srk */
2184 	0xb9e90000, /* sgrk */
2185 	0x5b000000, /* s */
2186 	0xe3000000005b, /* sy */
2187 	0xe30000000009, /* sg */
2188 };
2189 
2190 static const struct ins_forms logical_sub_forms = {
2191 	0x1f00, /* slr */
2192 	0xb90b0000, /* slgr */
2193 	0xb9fb0000, /* slrk */
2194 	0xb9eb0000, /* slgrk */
2195 	0x5f000000, /* sl */
2196 	0xe3000000005f, /* sly */
2197 	0xe3000000000b, /* slg */
2198 };
2199 
sljit_emit_sub(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2200 static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op,
2201 	sljit_s32 dst, sljit_sw dstw,
2202 	sljit_s32 src1, sljit_sw src1w,
2203 	sljit_s32 src2, sljit_sw src2w)
2204 {
2205 	int sets_signed = sets_signed_flag(op);
2206 	int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2207 	const struct ins_forms *forms;
2208 	sljit_ins ins;
2209 
2210 	if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) {
2211 		int compare_signed = GET_FLAG_TYPE(op) >= SLJIT_SIG_LESS;
2212 
2213 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE;
2214 
2215 		if (src2 & SLJIT_IMM) {
2216 			if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w)))
2217 			{
2218 				if ((op & SLJIT_I32_OP) || is_s32(src2w)) {
2219 					ins = (op & SLJIT_I32_OP) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */;
2220 					return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2221 				}
2222 			}
2223 			else {
2224 				if ((op & SLJIT_I32_OP) || is_u32(src2w)) {
2225 					ins = (op & SLJIT_I32_OP) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */;
2226 					return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2227 				}
2228 				if (is_s16(src2w))
2229 					return emit_rie_d(compiler, 0xec00000000db /* alghsik */, SLJIT_UNUSED, src1, src1w, src2w);
2230 			}
2231 		}
2232 		else if (src2 & SLJIT_MEM) {
2233 			if ((op & SLJIT_I32_OP) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) {
2234 				ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */;
2235 				return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A);
2236 			}
2237 
2238 			if (compare_signed)
2239 				ins = (op & SLJIT_I32_OP) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */;
2240 			else
2241 				ins = (op & SLJIT_I32_OP) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */;
2242 			return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A);
2243 		}
2244 
2245 		if (compare_signed)
2246 			ins = (op & SLJIT_I32_OP) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */;
2247 		else
2248 			ins = (op & SLJIT_I32_OP) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */;
2249 		return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w);
2250 	}
2251 
2252 	if (src2 & SLJIT_IMM) {
2253 		sljit_sw neg_src2w = -src2w;
2254 
2255 		if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) {
2256 			if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2257 				if (sets_signed)
2258 					ins = (op & SLJIT_I32_OP) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2259 				else
2260 					ins = (op & SLJIT_I32_OP) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2261 				return emit_siy(compiler, ins, dst, dstw, neg_src2w);
2262 			}
2263 
2264 			if (is_s16(neg_src2w)) {
2265 				if (sets_signed)
2266 					ins = (op & SLJIT_I32_OP) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2267 				else
2268 					ins = (op & SLJIT_I32_OP) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2269 				FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w));
2270 				goto done;
2271 			}
2272 		}
2273 
2274 		if (!sets_signed) {
2275 			if ((op & SLJIT_I32_OP) || is_u32(src2w)) {
2276 				ins = (op & SLJIT_I32_OP) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */;
2277 				FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2278 				goto done;
2279 			}
2280 			if (is_u32(neg_src2w)) {
2281 				FAIL_IF(emit_ri(compiler, 0xc20a00000000 /* algfi */, dst, src1, src1w, neg_src2w, RIL_A));
2282 				goto done;
2283 			}
2284 		}
2285 		else if ((op & SLJIT_I32_OP) || is_s32(neg_src2w)) {
2286 			ins = (op & SLJIT_I32_OP) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2287 			FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A));
2288 			goto done;
2289 		}
2290 	}
2291 
2292 	forms = sets_signed ? &sub_forms : &logical_sub_forms;
2293 	FAIL_IF(emit_non_commutative(compiler, forms, dst, dstw, src1, src1w, src2, src2w));
2294 
2295 done:
2296 	if (sets_signed) {
2297 		sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2298 
2299 		if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) {
2300 			/* In case of overflow, the sign bit of the two source operands must be different, and
2301 			     - the first operand is greater if the sign bit of the result is set
2302 			     - the first operand is less if the sign bit of the result is not set
2303 			   The -result operation sets the corrent sign, because the result cannot be zero.
2304 			   The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */
2305 			FAIL_IF(push_inst(compiler, brc(0xe, 2 + 2)));
2306 			FAIL_IF(push_inst(compiler, (op & SLJIT_I32_OP) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r)));
2307 		}
2308 		else if (op & SLJIT_SET_Z)
2309 			FAIL_IF(update_zero_overflow(compiler, op, dst_r));
2310 	}
2311 
2312 	if (dst & SLJIT_MEM)
2313 		return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP);
2314 
2315 	return SLJIT_SUCCESS;
2316 }
2317 
2318 static const struct ins_forms multiply_forms = {
2319 	0xb2520000, /* msr */
2320 	0xb90c0000, /* msgr */
2321 	0xb9fd0000, /* msrkc */
2322 	0xb9ed0000, /* msgrkc */
2323 	0x71000000, /* ms */
2324 	0xe30000000051, /* msy */
2325 	0xe3000000000c, /* msg */
2326 };
2327 
2328 static const struct ins_forms multiply_overflow_forms = {
2329 	0,
2330 	0,
2331 	0xb9fd0000, /* msrkc */
2332 	0xb9ed0000, /* msgrkc */
2333 	0,
2334 	0xe30000000053, /* msc */
2335 	0xe30000000083, /* msgc */
2336 };
2337 
sljit_emit_multiply(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2338 static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op,
2339 	sljit_s32 dst, sljit_sw dstw,
2340 	sljit_s32 src1, sljit_sw src1w,
2341 	sljit_s32 src2, sljit_sw src2w)
2342 {
2343 	sljit_ins ins;
2344 
2345 	if (HAS_FLAGS(op)) {
2346 		/* if have_misc2 fails, this operation should be emulated. 32 bit emulation:
2347 		FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r)));
2348 		FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r)));
2349 		if (dst_r != tmp0) {
2350 			FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
2351 		}
2352 		FAIL_IF(push_inst(compiler, aih(tmp0, 1)));
2353 		FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U)));
2354 		FAIL_IF(push_inst(compiler, ipm(flag_r)));
2355 		FAIL_IF(push_inst(compiler, oilh(flag_r, 0x2000))); */
2356 
2357 		return emit_commutative(compiler, &multiply_overflow_forms, dst, dstw, src1, src1w, src2, src2w);
2358 	}
2359 
2360 	if (src2 & SLJIT_IMM) {
2361 		if (is_s16(src2w)) {
2362 			ins = (op & SLJIT_I32_OP) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */;
2363 			return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A);
2364 		}
2365 
2366 		if (is_s32(src2w)) {
2367 			ins = (op & SLJIT_I32_OP) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */;
2368 			return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A);
2369 		}
2370 	}
2371 
2372 	return emit_commutative(compiler, &multiply_forms, dst, dstw, src1, src1w, src2, src2w);
2373 }
2374 
sljit_emit_bitwise_imm(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_uw imm,sljit_s32 count16)2375 static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type,
2376 	sljit_s32 dst, sljit_sw dstw,
2377 	sljit_s32 src1, sljit_sw src1w,
2378 	sljit_uw imm, sljit_s32 count16)
2379 {
2380 	sljit_s32 mode = compiler->mode;
2381 	sljit_gpr dst_r = tmp0;
2382 	sljit_s32 needs_move = 1;
2383 
2384 	if (SLOW_IS_REG(dst)) {
2385 		dst_r = gpr(dst & REG_MASK);
2386 		if (dst == src1)
2387 			needs_move = 0;
2388 	}
2389 
2390 	if (needs_move)
2391 		FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
2392 
2393 	if (type == SLJIT_AND) {
2394 		if (!(mode & SLJIT_I32_OP))
2395 			FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | (dst_r << 36) | (imm >> 32)));
2396 		return push_inst(compiler, 0xc00b00000000 /* nilf */ | (dst_r << 36) | (imm & 0xffffffff));
2397 	}
2398 	else if (type == SLJIT_OR) {
2399 		if (count16 >= 3) {
2400 			FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | (dst_r << 36) | (imm >> 32)));
2401 			return push_inst(compiler, 0xc00d00000000 /* oilf */ | (dst_r << 36) | (imm & 0xffffffff));
2402 		}
2403 
2404 		if (count16 >= 2) {
2405 			if ((imm & 0x00000000ffffffffull) == 0)
2406 				return push_inst(compiler, 0xc00c00000000 /* oihf */ | (dst_r << 36) | (imm >> 32));
2407 			if ((imm & 0xffffffff00000000ull) == 0)
2408 				return push_inst(compiler, 0xc00d00000000 /* oilf */ | (dst_r << 36) | (imm & 0xffffffff));
2409 		}
2410 
2411 		if ((imm & 0xffff000000000000ull) != 0)
2412 			FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | (dst_r << 20) | (imm >> 48)));
2413 		if ((imm & 0x0000ffff00000000ull) != 0)
2414 			FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | (dst_r << 20) | ((imm >> 32) & 0xffff)));
2415 		if ((imm & 0x00000000ffff0000ull) != 0)
2416 			FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | (dst_r << 20) | ((imm >> 16) & 0xffff)));
2417 		if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2418 			return push_inst(compiler, 0xa50b0000 /* oill */ | (dst_r << 20) | (imm & 0xffff));
2419 		return SLJIT_SUCCESS;
2420 	}
2421 
2422 	if ((imm & 0xffffffff00000000ull) != 0)
2423 		FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | (dst_r << 36) | (imm >> 32)));
2424 	if ((imm & 0x00000000ffffffffull) != 0 || imm == 0)
2425 		return push_inst(compiler, 0xc00700000000 /* xilf */ | (dst_r << 36) | (imm & 0xffffffff));
2426 	return SLJIT_SUCCESS;
2427 }
2428 
2429 static const struct ins_forms bitwise_and_forms = {
2430 	0x1400, /* nr */
2431 	0xb9800000, /* ngr */
2432 	0xb9f40000, /* nrk */
2433 	0xb9e40000, /* ngrk */
2434 	0x54000000, /* n */
2435 	0xe30000000054, /* ny */
2436 	0xe30000000080, /* ng */
2437 };
2438 
2439 static const struct ins_forms bitwise_or_forms = {
2440 	0x1600, /* or */
2441 	0xb9810000, /* ogr */
2442 	0xb9f60000, /* ork */
2443 	0xb9e60000, /* ogrk */
2444 	0x56000000, /* o */
2445 	0xe30000000056, /* oy */
2446 	0xe30000000081, /* og */
2447 };
2448 
2449 static const struct ins_forms bitwise_xor_forms = {
2450 	0x1700, /* xr */
2451 	0xb9820000, /* xgr */
2452 	0xb9f70000, /* xrk */
2453 	0xb9e70000, /* xgrk */
2454 	0x57000000, /* x */
2455 	0xe30000000057, /* xy */
2456 	0xe30000000082, /* xg */
2457 };
2458 
sljit_emit_bitwise(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2459 static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op,
2460 	sljit_s32 dst, sljit_sw dstw,
2461 	sljit_s32 src1, sljit_sw src1w,
2462 	sljit_s32 src2, sljit_sw src2w)
2463 {
2464 	sljit_s32 type = GET_OPCODE(op);
2465 	const struct ins_forms *forms;
2466 
2467 	if ((src2 & SLJIT_IMM) && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == SLJIT_UNUSED))) {
2468 		sljit_s32 count16 = 0;
2469 		sljit_uw imm = (sljit_uw)src2w;
2470 
2471 		if (op & SLJIT_I32_OP)
2472 			imm &= 0xffffffffull;
2473 
2474 		if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2475 			count16++;
2476 		if ((imm & 0x00000000ffff0000ull) != 0)
2477 			count16++;
2478 		if ((imm & 0x0000ffff00000000ull) != 0)
2479 			count16++;
2480 		if ((imm & 0xffff000000000000ull) != 0)
2481 			count16++;
2482 
2483 		if (type == SLJIT_AND && dst == SLJIT_UNUSED && count16 == 1) {
2484 			sljit_gpr src_r = tmp0;
2485 
2486 			if (FAST_IS_REG(src1))
2487 				src_r = gpr(src1 & REG_MASK);
2488 			else
2489 				FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2490 
2491 			if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2492 				return push_inst(compiler, 0xa7010000 | (src_r << 20) | imm);
2493 			if ((imm & 0x00000000ffff0000ull) != 0)
2494 				return push_inst(compiler, 0xa7000000 | (src_r << 20) | (imm >> 16));
2495 			if ((imm & 0x0000ffff00000000ull) != 0)
2496 				return push_inst(compiler, 0xa7030000 | (src_r << 20) | (imm >> 32));
2497 			return push_inst(compiler, 0xa7020000 | (src_r << 20) | (imm >> 48));
2498 		}
2499 
2500 		if (!(op & SLJIT_SET_Z))
2501 			return sljit_emit_bitwise_imm(compiler, type, dst, dstw, src1, src1w, imm, count16);
2502 	}
2503 
2504 	if (type == SLJIT_AND)
2505 		forms = &bitwise_and_forms;
2506 	else if (type == SLJIT_OR)
2507 		forms = &bitwise_or_forms;
2508 	else
2509 		forms = &bitwise_xor_forms;
2510 
2511 	return emit_commutative(compiler, forms, dst, dstw, src1, src1w, src2, src2w);
2512 }
2513 
sljit_emit_shift(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2514 static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op,
2515 	sljit_s32 dst, sljit_sw dstw,
2516 	sljit_s32 src1, sljit_sw src1w,
2517 	sljit_s32 src2, sljit_sw src2w)
2518 {
2519 	sljit_s32 type = GET_OPCODE(op);
2520 	sljit_gpr dst_r = SLOW_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2521 	sljit_gpr src_r = tmp0;
2522 	sljit_gpr base_r = tmp0;
2523 	sljit_ins imm = 0;
2524 	sljit_ins ins;
2525 
2526 	if (FAST_IS_REG(src1))
2527 		src_r = gpr(src1 & REG_MASK);
2528 	else
2529 		FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2530 
2531 	if (src2 & SLJIT_IMM)
2532 		imm = src2w & ((op & SLJIT_I32_OP) ? 0x1f : 0x3f);
2533 	else if (FAST_IS_REG(src2))
2534 		base_r = gpr(src2 & REG_MASK);
2535 	else {
2536 		FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2537 		base_r = tmp1;
2538 	}
2539 
2540 	if ((op & SLJIT_I32_OP) && dst_r == src_r) {
2541 		if (type == SLJIT_SHL)
2542 			ins = 0x89000000 /* sll */;
2543 		else if (type == SLJIT_LSHR)
2544 			ins = 0x88000000 /* srl */;
2545 		else
2546 			ins = 0x8a000000 /* sra */;
2547 
2548 		FAIL_IF(push_inst(compiler, ins | (dst_r << 20) | (base_r << 12) | imm));
2549 	}
2550 	else {
2551 		if (type == SLJIT_SHL)
2552 			ins = (op & SLJIT_I32_OP) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */;
2553 		else if (type == SLJIT_LSHR)
2554 			ins = (op & SLJIT_I32_OP) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */;
2555 		else
2556 			ins = (op & SLJIT_I32_OP) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */;
2557 
2558 		FAIL_IF(push_inst(compiler, ins | (dst_r << 36) | (src_r << 32) | (base_r << 28) | (imm << 16)));
2559 	}
2560 
2561 	if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR)
2562 		return push_inst(compiler, (op & SLJIT_I32_OP) ? or(dst_r, dst_r) : ogr(dst_r, dst_r));
2563 
2564 	return SLJIT_SUCCESS;
2565 }
2566 
2567 static const struct ins_forms addc_forms = {
2568 	0xb9980000, /* alcr */
2569 	0xb9880000, /* alcgr */
2570 	0,
2571 	0,
2572 	0,
2573 	0xe30000000098, /* alc */
2574 	0xe30000000088, /* alcg */
2575 };
2576 
2577 static const struct ins_forms subc_forms = {
2578 	0xb9990000, /* slbr */
2579 	0xb9890000, /* slbgr */
2580 	0,
2581 	0,
2582 	0,
2583 	0xe30000000099, /* slb */
2584 	0xe30000000089, /* slbg */
2585 };
2586 
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2587 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2588 	sljit_s32 dst, sljit_sw dstw,
2589 	sljit_s32 src1, sljit_sw src1w,
2590 	sljit_s32 src2, sljit_sw src2w)
2591 {
2592 	CHECK_ERROR();
2593 	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2594 	ADJUST_LOCAL_OFFSET(dst, dstw);
2595 	ADJUST_LOCAL_OFFSET(src1, src1w);
2596 	ADJUST_LOCAL_OFFSET(src2, src2w);
2597 
2598 	if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
2599 		return SLJIT_SUCCESS;
2600 
2601 	compiler->mode = op & SLJIT_I32_OP;
2602 	compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2603 
2604 	if (GET_OPCODE(op) >= SLJIT_ADD || GET_OPCODE(op) <= SLJIT_SUBC)
2605 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD_SUB;
2606 
2607 	if (is_commutative(op) && (src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM)) {
2608 		src1 ^= src2;
2609 		src2 ^= src1;
2610 		src1 ^= src2;
2611 
2612 		src1w ^= src2w;
2613 		src2w ^= src1w;
2614 		src1w ^= src2w;
2615 	}
2616 
2617 	switch (GET_OPCODE(op)) {
2618 	case SLJIT_ADD:
2619 		return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2620 	case SLJIT_ADDC:
2621 		FAIL_IF(emit_commutative(compiler, &addc_forms, dst, dstw, src1, src1w, src2, src2w));
2622 		if (dst & SLJIT_MEM)
2623 			return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP);
2624 		return SLJIT_SUCCESS;
2625 	case SLJIT_SUB:
2626 		return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2627 	case SLJIT_SUBC:
2628 		FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, dstw, src1, src1w, src2, src2w));
2629 		if (dst & SLJIT_MEM)
2630 			return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP);
2631 		return SLJIT_SUCCESS;
2632 	case SLJIT_MUL:
2633 		FAIL_IF(sljit_emit_multiply(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2634 		break;
2635 	case SLJIT_AND:
2636 	case SLJIT_OR:
2637 	case SLJIT_XOR:
2638 		FAIL_IF(sljit_emit_bitwise(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2639 		break;
2640 	case SLJIT_SHL:
2641 	case SLJIT_LSHR:
2642 	case SLJIT_ASHR:
2643 		FAIL_IF(sljit_emit_shift(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2644 		break;
2645 	}
2646 
2647 	if (dst & SLJIT_MEM)
2648 		return store_word(compiler, tmp0, dst, dstw, op & SLJIT_I32_OP);
2649 	return SLJIT_SUCCESS;
2650 }
2651 
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)2652 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(
2653 	struct sljit_compiler *compiler,
2654 	sljit_s32 op, sljit_s32 src, sljit_sw srcw)
2655 {
2656 	sljit_gpr src_r;
2657 
2658 	CHECK_ERROR();
2659 	CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2660 	ADJUST_LOCAL_OFFSET(src, srcw);
2661 
2662 	switch (op) {
2663 	case SLJIT_FAST_RETURN:
2664 		src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
2665 		if (src & SLJIT_MEM)
2666 			FAIL_IF(load_word(compiler, tmp1, src, srcw, 0));
2667 
2668 		return push_inst(compiler, br(src_r));
2669 	case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2670 		/* TODO(carenas): implement? */
2671 		return SLJIT_SUCCESS;
2672 	case SLJIT_PREFETCH_L1:
2673 	case SLJIT_PREFETCH_L2:
2674 	case SLJIT_PREFETCH_L3:
2675 	case SLJIT_PREFETCH_ONCE:
2676 		/* TODO(carenas): implement */
2677 		return SLJIT_SUCCESS;
2678 	default:
2679                 /* TODO(carenas): probably should not success by default */
2680 		return SLJIT_SUCCESS;
2681 	}
2682 
2683 	return SLJIT_SUCCESS;
2684 }
2685 
sljit_get_register_index(sljit_s32 reg)2686 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2687 {
2688 	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2689 	return gpr(reg);
2690 }
2691 
sljit_get_float_register_index(sljit_s32 reg)2692 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
2693 {
2694 	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2695 	abort();
2696 }
2697 
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_s32 size)2698 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2699 	void *instruction, sljit_s32 size)
2700 {
2701 	sljit_ins ins = 0;
2702 
2703 	CHECK_ERROR();
2704 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2705 
2706 	memcpy((sljit_u8 *)&ins + sizeof(ins) - size, instruction, size);
2707 	return push_inst(compiler, ins);
2708 }
2709 
2710 /* --------------------------------------------------------------------- */
2711 /*  Floating point operators                                             */
2712 /* --------------------------------------------------------------------- */
2713 
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2714 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2715 	sljit_s32 dst, sljit_sw dstw,
2716 	sljit_s32 src, sljit_sw srcw)
2717 {
2718 	CHECK_ERROR();
2719 	abort();
2720 }
2721 
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2722 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2723 	sljit_s32 dst, sljit_sw dstw,
2724 	sljit_s32 src1, sljit_sw src1w,
2725 	sljit_s32 src2, sljit_sw src2w)
2726 {
2727 	CHECK_ERROR();
2728 	abort();
2729 }
2730 
2731 /* --------------------------------------------------------------------- */
2732 /*  Other instructions                                                   */
2733 /* --------------------------------------------------------------------- */
2734 
sljit_emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)2735 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
2736 {
2737 	CHECK_ERROR();
2738 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
2739 	ADJUST_LOCAL_OFFSET(dst, dstw);
2740 
2741 	if (FAST_IS_REG(dst))
2742 		return push_inst(compiler, lgr(gpr(dst), fast_link_r));
2743 
2744 	/* memory */
2745 	return store_word(compiler, fast_link_r, dst, dstw, 0);
2746 }
2747 
2748 /* --------------------------------------------------------------------- */
2749 /*  Conditional instructions                                             */
2750 /* --------------------------------------------------------------------- */
2751 
sljit_emit_label(struct sljit_compiler * compiler)2752 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2753 {
2754 	struct sljit_label *label;
2755 
2756 	CHECK_ERROR_PTR();
2757 	CHECK_PTR(check_sljit_emit_label(compiler));
2758 
2759 	if (compiler->last_label && compiler->last_label->size == compiler->size)
2760 		return compiler->last_label;
2761 
2762 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2763 	PTR_FAIL_IF(!label);
2764 	set_label(label, compiler);
2765 	return label;
2766 }
2767 
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)2768 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2769 {
2770 	sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf;
2771 
2772 	CHECK_ERROR_PTR();
2773 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
2774 
2775 	/* record jump */
2776 	struct sljit_jump *jump = (struct sljit_jump *)
2777 		ensure_abuf(compiler, sizeof(struct sljit_jump));
2778 	PTR_FAIL_IF(!jump);
2779 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2780 	jump->addr = compiler->size;
2781 
2782 	/* emit jump instruction */
2783 	type &= 0xff;
2784 	if (type >= SLJIT_FAST_CALL)
2785 		PTR_FAIL_IF(push_inst(compiler, brasl(type == SLJIT_FAST_CALL ? fast_link_r : link_r, 0)));
2786 	else
2787 		PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0)));
2788 
2789 	return jump;
2790 }
2791 
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)2792 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
2793 	sljit_s32 arg_types)
2794 {
2795 	CHECK_ERROR_PTR();
2796 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
2797 
2798 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2799 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2800 	compiler->skip_checks = 1;
2801 #endif
2802 
2803 	return sljit_emit_jump(compiler, type);
2804 }
2805 
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)2806 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2807 {
2808 	sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
2809 
2810 	CHECK_ERROR();
2811 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2812 	ADJUST_LOCAL_OFFSET(src, srcw);
2813 
2814 	if (src & SLJIT_IMM) {
2815 		SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */
2816 		FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
2817 	}
2818 	else if (src & SLJIT_MEM)
2819 		FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */));
2820 
2821 	/* emit jump instruction */
2822 	if (type >= SLJIT_FAST_CALL)
2823 		return push_inst(compiler, basr(type == SLJIT_FAST_CALL ? fast_link_r : link_r, src_r));
2824 
2825 	return push_inst(compiler, br(src_r));
2826 }
2827 
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)2828 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
2829 	sljit_s32 arg_types,
2830 	sljit_s32 src, sljit_sw srcw)
2831 {
2832 	CHECK_ERROR();
2833 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
2834 
2835 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2836 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2837 	compiler->skip_checks = 1;
2838 #endif
2839 
2840 	return sljit_emit_ijump(compiler, type, src, srcw);
2841 }
2842 
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)2843 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2844 	sljit_s32 dst, sljit_sw dstw,
2845 	sljit_s32 type)
2846 {
2847 	sljit_u8 mask = get_cc(compiler, type & 0xff);
2848 
2849 	CHECK_ERROR();
2850 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
2851 
2852 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2853 	sljit_gpr loc_r = tmp1;
2854 	switch (GET_OPCODE(op)) {
2855 	case SLJIT_AND:
2856 	case SLJIT_OR:
2857 	case SLJIT_XOR:
2858 		compiler->status_flags_state = op & SLJIT_SET_Z;
2859 
2860 		/* dst is also source operand */
2861 		if (dst & SLJIT_MEM)
2862 			FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_I32_OP));
2863 
2864 		break;
2865 	case SLJIT_MOV:
2866 	case (SLJIT_MOV32 & ~SLJIT_I32_OP):
2867 		/* can write straight into destination */
2868 		loc_r = dst_r;
2869 		break;
2870 	default:
2871 		SLJIT_UNREACHABLE();
2872 	}
2873 
2874 	/* TODO(mundaym): fold into cmov helper function? */
2875 	#define LEVAL(i) i(loc_r, 1, mask)
2876 	if (have_lscond2()) {
2877 		FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));
2878 		FAIL_IF(push_inst(compiler,
2879 			WHEN2(op & SLJIT_I32_OP, lochi, locghi)));
2880 	} else {
2881 		/* TODO(mundaym): no load/store-on-condition 2 facility (ipm? branch-and-set?) */
2882 		abort();
2883 	}
2884 	#undef LEVAL
2885 
2886 	/* apply bitwise op and set condition codes */
2887 	switch (GET_OPCODE(op)) {
2888 	#define LEVAL(i) i(dst_r, loc_r)
2889 	case SLJIT_AND:
2890 		FAIL_IF(push_inst(compiler,
2891 			WHEN2(op & SLJIT_I32_OP, nr, ngr)));
2892 		break;
2893 	case SLJIT_OR:
2894 		FAIL_IF(push_inst(compiler,
2895 			WHEN2(op & SLJIT_I32_OP, or, ogr)));
2896 		break;
2897 	case SLJIT_XOR:
2898 		FAIL_IF(push_inst(compiler,
2899 			WHEN2(op & SLJIT_I32_OP, xr, xgr)));
2900 		break;
2901 	#undef LEVAL
2902 	}
2903 
2904 	/* store result to memory if required */
2905 	if (dst & SLJIT_MEM)
2906 		return store_word(compiler, dst_r, dst, dstw, op & SLJIT_I32_OP);
2907 
2908 	return SLJIT_SUCCESS;
2909 }
2910 
sljit_emit_cmov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src,sljit_sw srcw)2911 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
2912 	sljit_s32 dst_reg,
2913 	sljit_s32 src, sljit_sw srcw)
2914 {
2915 	sljit_u8 mask = get_cc(compiler, type & 0xff);
2916 	sljit_gpr dst_r = gpr(dst_reg & ~SLJIT_I32_OP);
2917 	sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp0;
2918 
2919 	CHECK_ERROR();
2920 	CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
2921 
2922 	if (src & SLJIT_IMM) {
2923 		/* TODO(mundaym): fast path with lscond2 */
2924 		FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
2925 	}
2926 
2927 	#define LEVAL(i) i(dst_r, src_r, mask)
2928 	if (have_lscond1())
2929 		return push_inst(compiler,
2930 			WHEN2(dst_reg & SLJIT_I32_OP, locr, locgr));
2931 
2932 	#undef LEVAL
2933 
2934 	/* TODO(mundaym): implement */
2935 	return SLJIT_ERR_UNSUPPORTED;
2936 }
2937 
2938 /* --------------------------------------------------------------------- */
2939 /*  Other instructions                                                   */
2940 /* --------------------------------------------------------------------- */
2941 
2942 /* On s390x we build a literal pool to hold constants. This has two main
2943    advantages:
2944 
2945      1. we only need one instruction in the instruction stream (LGRL)
2946      2. we can store 64 bit addresses and use 32 bit offsets
2947 
2948    To retrofit the extra information needed to build the literal pool we
2949    add a new sljit_s390x_const struct that contains the initial value but
2950    can still be cast to a sljit_const. */
2951 
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)2952 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
2953 {
2954 	struct sljit_s390x_const *const_;
2955 	sljit_gpr dst_r;
2956 
2957 	CHECK_ERROR_PTR();
2958 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2959 
2960 	const_ = (struct sljit_s390x_const*)ensure_abuf(compiler,
2961 					sizeof(struct sljit_s390x_const));
2962 	PTR_FAIL_IF(!const_);
2963 	set_const((struct sljit_const*)const_, compiler);
2964 	const_->init_value = init_value;
2965 
2966 	dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2967 	if (have_genext())
2968 		PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | lgrl(dst_r, 0)));
2969 	else {
2970 		PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | larl(tmp1, 0)));
2971 		PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
2972 	}
2973 
2974 	if (dst & SLJIT_MEM)
2975 		PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0 /* always 64-bit */));
2976 
2977 	return (struct sljit_const*)const_;
2978 }
2979 
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)2980 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
2981 {
2982 	/* Update the constant pool. */
2983 	sljit_uw *ptr = (sljit_uw *)addr;
2984 	SLJIT_UNUSED_ARG(executable_offset);
2985 
2986 	SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
2987 	*ptr = new_target;
2988 	SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
2989 	SLJIT_CACHE_FLUSH(ptr, ptr + 1);
2990 }
2991 
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)2992 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
2993 {
2994 	sljit_set_jump_addr(addr, new_constant, executable_offset);
2995 }
2996 
sljit_emit_put_label(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)2997 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label(
2998 	struct sljit_compiler *compiler,
2999 	sljit_s32 dst, sljit_sw dstw)
3000 {
3001 	struct sljit_put_label *put_label;
3002 	sljit_gpr dst_r;
3003 
3004 	CHECK_ERROR_PTR();
3005 	CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
3006 	ADJUST_LOCAL_OFFSET(dst, dstw);
3007 
3008 	put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
3009 	PTR_FAIL_IF(!put_label);
3010 	set_put_label(put_label, compiler, 0);
3011 
3012 	dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3013 
3014 	if (have_genext())
3015 		PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0)));
3016 	else {
3017 		PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0)));
3018 		PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
3019 	}
3020 
3021 	if (dst & SLJIT_MEM)
3022 		PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0));
3023 
3024 	return put_label;
3025 }
3026 
3027 /* TODO(carenas): EVAL probably should move up or be refactored */
3028 #undef WHEN2
3029 #undef EVAL
3030 
3031 #undef tmp1
3032 #undef tmp0
3033 
3034 /* TODO(carenas): undef other macros that spill like is_u12? */
3035