1#define M64	67108864
2
3#define aB1	(1<<1)
4#define aB2	(1<<2)
5#define aB3	(1<<3)
6#define aB4	(1<<4)
7#define aB5	(1<<5)
8#define aB6	(1<<6)
9#define aB7	(1<<7)
10#define aB8	(1<<8)
11#define aB9	(1<<9)
12#define aB10	(1<<10)
13#define aB11	(1<<11)
14#define aB12	(1<<12)
15#define aB13	(1<<13)
16#define aB14	(1<<14)
17#define aB15	(1<<15)
18#define aB16	(1<<16)
19#define aB17	(1<<17)
20#define aB18	(1<<18)
21#define aB19	(1<<19)
22#define aB20	(1<<20)
23#define aB21	(1<<21)
24#define aB22	(1<<22)
25#define aB23	(1<<23)
26#define aB24	(1<<24)
27#define aB25	(1<<25)
28#define aB26	(1<<26)
29#define bB1	(-aB1)
30#define bB2	(-aB2)
31#define bB3	(-aB3)
32#define bB4	(-aB4)
33#define bB5	(-aB5)
34#define bB6	(-aB6)
35#define bB7	(-aB7)
36#define bB8	(-aB8)
37#define bB9	(-aB9)
38#define bB10	(-aB10)
39#define bB11	(-aB11)
40#define bB12	(-aB12)
41#define bB13	(-aB13)
42#define bB14	(-aB14)
43#define bB15	(-aB15)
44#define bB16	(-aB16)
45#define bB17	(-aB17)
46#define bB18	(-aB18)
47#define bB19	(-aB19)
48#define bB20	(-aB20)
49#define bB21	(-aB21)
50#define bB22	(-aB22)
51#define bB23	(-aB23)
52#define bB24	(-aB24)
53#define bB25	(-aB25)
54#define bB26	(-aB26)
55#define cB1	(aB1-1)
56#define cB2	(aB2-1)
57#define cB3	(aB3-1)
58#define cB4	(aB4-1)
59#define cB5	(aB5-1)
60#define cB6	(aB6-1)
61#define cB7	(aB7-1)
62#define cB8	(aB8-1)
63#define cB9	(aB9-1)
64#define cB10	(aB10-1)
65#define cB11	(aB11-1)
66#define cB12	(aB12-1)
67#define cB13	(aB13-1)
68#define cB14	(aB14-1)
69#define cB15	(aB15-1)
70#define cB16	(aB16-1)
71#define cB17	(aB17-1)
72#define cB18	(aB18-1)
73#define cB19	(aB19-1)
74#define cB20	(aB20-1)
75#define cB21	(aB21-1)
76#define cB22	(aB22-1)
77#define cB23	(aB23-1)
78#define cB24	(aB24-1)
79#define cB25	(aB25-1)
80#define cB26	(aB26-1)
81#define dB1	(-aB1+1)
82#define dB2	(-aB2+1)
83#define dB3	(-aB3+1)
84#define dB4	(-aB4+1)
85#define dB5	(-aB5+1)
86#define dB6	(-aB6+1)
87#define dB7	(-aB7+1)
88#define dB8	(-aB8+1)
89#define dB9	(-aB9+1)
90#define dB10	(-aB10+1)
91#define dB11	(-aB11+1)
92#define dB12	(-aB12+1)
93#define dB13	(-aB13+1)
94#define dB14	(-aB14+1)
95#define dB15	(-aB15+1)
96#define dB16	(-aB16+1)
97#define dB17	(-aB17+1)
98#define dB18	(-aB18+1)
99#define dB19	(-aB19+1)
100#define dB20	(-aB20+1)
101#define dB21	(-aB21+1)
102#define dB22	(-aB22+1)
103#define dB23	(-aB23+1)
104#define dB24	(-aB24+1)
105#define dB25	(-aB25+1)
106#define dB26	(-aB26+1)
107
108#define add(a, b)		$(a + b)
109#define sub(a, b)		$(a - b)
110#define rsb(a, b)		$(b - a)
111#define mul(a, b)		$(a * b)
112#define div(a, b)		$(a / b)
113#define rem(a, b)		$(a % b)
114#define and(a, b)		$(a & b)
115#define or(a, b)		$(a | b)
116#define xor(a, b)		$(a ^ b)
117
118#define alu2(N, X, L, R, V)					\
119	movi %r1 L						\
120	movi %r2 R						\
121	N##r %r0 %r1 %r2					\
122	beqi X %r0 V						\
123	calli @abort						\
124X:
125#define alu1(N, M)						\
126	alu2(N, N##M##1, 3, $(M##1), N(3, M##1))		\
127	alu2(N, N##M##2, 3, $(M##2), N(3, M##2))		\
128	alu2(N, N##M##3, 3, $(M##3), N(3, M##3))		\
129	alu2(N, N##M##4, 3, $(M##4), N(3, M##4))		\
130	alu2(N, N##M##5, 3, $(M##5), N(3, M##5))		\
131	alu2(N, N##M##6, 3, $(M##6), N(3, M##6))		\
132	alu2(N, N##M##7, 3, $(M##7), N(3, M##7))		\
133	alu2(N, N##M##8, 3, $(M##8), N(3, M##8))		\
134	alu2(N, N##M##9, 3, $(M##9), N(3, M##9))		\
135	alu2(N, N##M##10, 3, $(M##10), N(3, M##10))		\
136	alu2(N, N##M##11, 3, $(M##11), N(3, M##11))		\
137	alu2(N, N##M##12, 3, $(M##12), N(3, M##12))		\
138	alu2(N, N##M##13, 3, $(M##13), N(3, M##13))		\
139	alu2(N, N##M##14, 3, $(M##14), N(3, M##14))		\
140	alu2(N, N##M##15, 3, $(M##15), N(3, M##15))		\
141	alu2(N, N##M##16, 3, $(M##16), N(3, M##16))		\
142	alu2(N, N##M##17, 3, $(M##17), N(3, M##17))		\
143	alu2(N, N##M##18, 3, $(M##18), N(3, M##18))		\
144	alu2(N, N##M##19, 3, $(M##19), N(3, M##19))		\
145	alu2(N, N##M##20, 3, $(M##20), N(3, M##20))		\
146	alu2(N, N##M##21, 3, $(M##21), N(3, M##21))		\
147	alu2(N, N##M##22, 3, $(M##22), N(3, M##22))		\
148	alu2(N, N##M##23, 3, $(M##23), N(3, M##23))		\
149	alu2(N, N##M##24, 3, $(M##24), N(3, M##24))		\
150	alu2(N, N##M##25, 3, $(M##25), N(3, M##25))		\
151	alu2(N, N##M##26, 3, $(M##26), N(3, M##26))
152
153#define alu(N)							\
154	alu1(N, aB)						\
155	alu1(N, bB)						\
156	alu1(N, cB)						\
157	alu1(N, dB)
158
159#define _lsh(N)							\
160	alu2(lsh, L##N, 3, N, $(3<<N))
161#define _rsh(N)							\
162	alu2(rsh, R##N, $(1<<63), N, $((1<<63)>>N))
163
164#if __WORDSIZE == 32
165#  define xsh64(X)			/**/
166#else
167#  define xsh64(X)						\
168	_##X##sh(32)						\
169	_##X##sh(33)						\
170	_##X##sh(34)						\
171	_##X##sh(35)						\
172	_##X##sh(36)						\
173	_##X##sh(37)						\
174	_##X##sh(38)						\
175	_##X##sh(39)						\
176	_##X##sh(40)						\
177	_##X##sh(41)						\
178	_##X##sh(42)						\
179	_##X##sh(43)						\
180	_##X##sh(44)						\
181	_##X##sh(45)						\
182	_##X##sh(46)						\
183	_##X##sh(47)						\
184	_##X##sh(48)						\
185	_##X##sh(49)						\
186	_##X##sh(50)						\
187	_##X##sh(51)						\
188	_##X##sh(52)						\
189	_##X##sh(53)						\
190	_##X##sh(54)						\
191	_##X##sh(55)						\
192	_##X##sh(56)						\
193	_##X##sh(57)						\
194	_##X##sh(58)						\
195	_##X##sh(59)						\
196	_##X##sh(60)						\
197	_##X##sh(61)						\
198	_##X##sh(62)						\
199	_##X##sh(63)
200#endif
201
202#define xsh(X)							\
203	_##X##sh(0)						\
204	_##X##sh(1)						\
205	_##X##sh(2)						\
206	_##X##sh(3)						\
207	_##X##sh(4)						\
208	_##X##sh(5)						\
209	_##X##sh(6)						\
210	_##X##sh(7)						\
211	_##X##sh(8)						\
212	_##X##sh(9)						\
213	_##X##sh(10)						\
214	_##X##sh(11)						\
215	_##X##sh(12)						\
216	_##X##sh(13)						\
217	_##X##sh(14)						\
218	_##X##sh(15)						\
219	_##X##sh(16)						\
220	_##X##sh(17)						\
221	_##X##sh(18)						\
222	_##X##sh(19)						\
223	_##X##sh(20)						\
224	_##X##sh(21)						\
225	_##X##sh(22)						\
226	_##X##sh(23)						\
227	_##X##sh(24)						\
228	_##X##sh(25)						\
229	_##X##sh(26)						\
230	_##X##sh(27)						\
231	_##X##sh(28)						\
232	_##X##sh(29)						\
233	_##X##sh(30)						\
234	_##X##sh(31)						\
235	xsh64(X)
236
237#define lsh()							\
238	xsh(l)
239#define rsh()							\
240	xsh(r)
241
242#define reset(V)						\
243	prepare							\
244		pushargi buf					\
245		pushargi V					\
246		pushargi $(M64 + 8)				\
247	finishi @memset
248
249#define stx(T, N, O, V)						\
250	movi %r0 V						\
251	movi %r1 O						\
252	stxr##T %r1 %v0 %r0
253#define stx8(T, M, V)						\
254	stx(T, 3, $(M##B3), V)					\
255	stx(T, 4, $(M##B4), V)					\
256	stx(T, 5, $(M##B5), V)					\
257	stx(T, 6, $(M##B6), V)					\
258	stx(T, 7, $(M##B7), V)					\
259	stx(T, 8, $(M##B8), V)					\
260	stx(T, 9, $(M##B9), V)					\
261	stx(T, 10, $(M##B10), V)				\
262	stx(T, 11, $(M##B11), V)				\
263	stx(T, 12, $(M##B12), V)				\
264	stx(T, 13, $(M##B13), V)				\
265	stx(T, 14, $(M##B14), V)				\
266	stx(T, 15, $(M##B15), V)				\
267	stx(T, 16, $(M##B16), V)				\
268	stx(T, 17, $(M##B17), V)				\
269	stx(T, 18, $(M##B18), V)				\
270	stx(T, 19, $(M##B19), V)				\
271	stx(T, 20, $(M##B20), V)				\
272	stx(T, 21, $(M##B21), V)				\
273	stx(T, 22, $(M##B22), V)				\
274	stx(T, 23, $(M##B23), V)				\
275	stx(T, 24, $(M##B24), V)				\
276	stx(T, 25, $(M##B25), V)				\
277	stx(T, 26, $(M##B26), V)
278#define stx4(T, M, V)						\
279	stx(T, 2, $(M##B2), V)					\
280	stx8(T, M, V)
281#define stx2(T, M, V)						\
282	stx(T, 1, $(M##B1), V)					\
283	stx4(T, M, V)
284#define ldx(T, N, M, O, V)					\
285	movi %r0 0						\
286	ldxi##T %r0 %v0 O					\
287	beqi ldx##T##N##M %r0 V					\
288	calli @abort						\
289ldx##T##N##M:
290#define ldx8(T, M, V)						\
291	ldx(T, 3, M, $(M##B3), V)				\
292	ldx(T, 4, M, $(M##B4), V)				\
293	ldx(T, 5, M, $(M##B5), V)				\
294	ldx(T, 6, M, $(M##B6), V)				\
295	ldx(T, 7, M, $(M##B7), V)				\
296	ldx(T, 8, M, $(M##B8), V)				\
297	ldx(T, 9, M, $(M##B9), V)				\
298	ldx(T, 10, M, $(M##B10), V)				\
299	ldx(T, 11, M, $(M##B11), V)				\
300	ldx(T, 12, M, $(M##B12), V)				\
301	ldx(T, 13, M, $(M##B13), V)				\
302	ldx(T, 14, M, $(M##B14), V)				\
303	ldx(T, 15, M, $(M##B15), V)				\
304	ldx(T, 16, M, $(M##B16), V)				\
305	ldx(T, 17, M, $(M##B17), V)				\
306	ldx(T, 18, M, $(M##B18), V)				\
307	ldx(T, 19, M, $(M##B19), V)				\
308	ldx(T, 20, M, $(M##B20), V)				\
309	ldx(T, 21, M, $(M##B21), V)				\
310	ldx(T, 22, M, $(M##B22), V)				\
311	ldx(T, 23, M, $(M##B23), V)				\
312	ldx(T, 24, M, $(M##B24), V)				\
313	ldx(T, 25, M, $(M##B25), V)				\
314	ldx(T, 26, M, $(M##B26), V)
315#define ldx4(T, M, V)						\
316	ldx(T, 2, M, $(M##B2), V)				\
317	ldx8(T, M, V)
318#define ldx2(T, M, V)						\
319	ldx(T, 1, M, $(M##B1), V)				\
320	ldx4(T, M, V)
321
322#define stf(T, N, O, V)						\
323	movi##T %f0 V						\
324	movi %r0 O						\
325	stxr##T %r0 %v0 %f0
326#define stf8(T, M, V)						\
327	stf(T, 3, $(M##B3), V)					\
328	stf(T, 4, $(M##B4), V)					\
329	stf(T, 5, $(M##B5), V)					\
330	stf(T, 6, $(M##B6), V)					\
331	stf(T, 7, $(M##B7), V)					\
332	stf(T, 8, $(M##B8), V)					\
333	stf(T, 9, $(M##B9), V)					\
334	stf(T, 10, $(M##B10), V)				\
335	stf(T, 11, $(M##B11), V)				\
336	stf(T, 12, $(M##B12), V)				\
337	stf(T, 13, $(M##B13), V)				\
338	stf(T, 14, $(M##B14), V)				\
339	stf(T, 15, $(M##B15), V)				\
340	stf(T, 16, $(M##B16), V)				\
341	stf(T, 17, $(M##B17), V)				\
342	stf(T, 18, $(M##B18), V)				\
343	stf(T, 19, $(M##B19), V)				\
344	stf(T, 20, $(M##B20), V)				\
345	stf(T, 21, $(M##B21), V)				\
346	stf(T, 22, $(M##B22), V)				\
347	stf(T, 23, $(M##B23), V)				\
348	stf(T, 24, $(M##B24), V)				\
349	stf(T, 25, $(M##B25), V)				\
350	stf(T, 26, $(M##B26), V)
351#define stf4(T, M, V)						\
352	stf(T, 2, $(M##B2), V)					\
353	stf8(T, M, V)
354#define ldf(T, N, M, O, V)					\
355	movi##T %f0 0						\
356	ldxi##T %f0 %v0 O					\
357	beqi##T ldf##T##N##M %f0 V				\
358	calli @abort						\
359ldf##T##N##M:
360#define ldf8(T, M, V)						\
361	ldf(T, 3, M, $(M##B3), V)				\
362	ldf(T, 4, M, $(M##B4), V)				\
363	ldf(T, 5, M, $(M##B5), V)				\
364	ldf(T, 6, M, $(M##B6), V)				\
365	ldf(T, 7, M, $(M##B7), V)				\
366	ldf(T, 8, M, $(M##B8), V)				\
367	ldf(T, 9, M, $(M##B9), V)				\
368	ldf(T, 10, M, $(M##B10), V)				\
369	ldf(T, 11, M, $(M##B11), V)				\
370	ldf(T, 12, M, $(M##B12), V)				\
371	ldf(T, 13, M, $(M##B13), V)				\
372	ldf(T, 14, M, $(M##B14), V)				\
373	ldf(T, 15, M, $(M##B15), V)				\
374	ldf(T, 16, M, $(M##B16), V)				\
375	ldf(T, 17, M, $(M##B17), V)				\
376	ldf(T, 18, M, $(M##B18), V)				\
377	ldf(T, 19, M, $(M##B19), V)				\
378	ldf(T, 20, M, $(M##B20), V)				\
379	ldf(T, 21, M, $(M##B21), V)				\
380	ldf(T, 22, M, $(M##B22), V)				\
381	ldf(T, 23, M, $(M##B23), V)				\
382	ldf(T, 24, M, $(M##B24), V)				\
383	ldf(T, 25, M, $(M##B25), V)				\
384	ldf(T, 26, M, $(M##B26), V)
385#define ldf4(T, M, V)						\
386	ldf(T, 2, M, $(M##B2), V)				\
387	ldf8(T, M, V)
388
389#define ldst_c()						\
390	reset(0xa5)						\
391	movi %v0 buf						\
392	stx2(_c, a, 0x5a)					\
393	ldx2(_c, a, 0x5a)					\
394	reset(0xa5)						\
395	movi %v0 $(buf + M64)					\
396	stx2(_c, b, 0x5a)					\
397	ldx2(_c, b, 0x5a)
398#define ldst_uc()						\
399	reset(0xa5)						\
400	movi %v0 buf						\
401	stx2(_c, a, 0x5a)					\
402	ldx2(_uc, a, 0x5a)					\
403	movi %v0 $(buf + M64)					\
404	stx2(_c, b, 0x5a)					\
405	ldx2(_uc, b, 0x5a)
406#define ldst_s()						\
407	reset(0xa5)						\
408	movi %v0 buf						\
409	stx2(_s, a, 0x5a5a)					\
410	ldx2(_s, a, 0x5a5a)					\
411	reset(0xa5)						\
412	movi %v0 $(buf + M64)					\
413	stx2(_s, b, 0x5a5a)					\
414	ldx2(_s, b, 0x5a5a)
415#define ldst_us()						\
416	reset(0xa5)						\
417	movi %v0 buf						\
418	stx2(_s, a, 0x5a5a)					\
419	ldx2(_us, a, 0x5a5a)					\
420	reset(0xa5)						\
421	movi %v0 $(buf + M64)					\
422	stx2(_s, b, 0x5a5a)					\
423	ldx2(_us, b, 0x5a5a)
424#define ldst_i()						\
425	reset(0xa5)						\
426	movi %v0 buf						\
427	stx4(_i, a, 0x5a5a5a5a)					\
428	ldx4(_i, a, 0x5a5a5a5a)					\
429	reset(0xa5)						\
430	movi %v0 $(buf + M64)					\
431	stx4(_i, b, 0x5a5a5a5a)					\
432	ldx4(_i, b, 0x5a5a5a5a)
433#define ldst_ui()						\
434	reset(0xa5)						\
435	movi %v0 buf						\
436	stx4(_i, a, 0x5a5a5a5a)					\
437	ldx4(_ui, a, 0x5a5a5a5a)				\
438	reset(0xa5)						\
439	movi %v0 $(buf + M64)					\
440	stx4(_i, b, 0x5a5a5a5a)					\
441	ldx4(_ui, b, 0x5a5a5a5a)
442#define ldst_l()						\
443	reset(0xa5)						\
444	movi %v0 buf						\
445	stx8(_l, a, 0x5a5a5a5a5a5a5a5a)				\
446	ldx8(_l, a, 0x5a5a5a5a5a5a5a5a)				\
447	reset(0xa5)						\
448	movi %v0 $(buf + M64)					\
449	stx8(_l, b, 0x5a5a5a5a5a5a5a5a)				\
450	ldx8(_l, b, 0x5a5a5a5a5a5a5a5a)
451#define ldst_f()						\
452	reset(0xa5)						\
453	movi %v0 buf						\
454	stf4(_f, a, 0.5)					\
455	ldf4(_f, a, 0.5)					\
456	reset(0xa5)						\
457	movi %v0 $(buf + M64)					\
458	stf4(_f, b, 0.5)					\
459	ldf4(_f, b, 0.5)
460#define ldst_d()						\
461	reset(0xa5)						\
462	movi %v0 buf						\
463	stf8(_d, a, 0.5)					\
464	ldf8(_d, a, 0.5)					\
465	reset(0xa5)						\
466	movi %v0 $(buf + M64)					\
467	stf8(_d, b, 0.5)					\
468	ldf8(_d, b, 0.5)
469
470.data		67112960
471buf:
472.size		M64
473.size		8
474ok:
475.c		"ok"
476
477.code
478	prolog
479
480	alu(add)
481	alu(sub)
482	alu(rsb)
483	alu(mul)
484	alu(div)
485	alu(rem)
486	lsh()
487	rsh()
488	alu(and)
489	alu(or)
490	alu(xor)
491	ldst_c()
492	ldst_uc()
493	ldst_s()
494	ldst_us()
495	ldst_i()
496#if __WORDSIZE == 64
497	ldst_ui()
498	ldst_l()
499#endif
500	ldst_f()
501	ldst_d()
502
503	prepare
504		pushargi ok
505	finishi @puts
506	ret
507	epilog
508