1#define M64	67108864
2
3#define aB1	(1<<1)
4#define aB2	(1<<2)
5#define aB3	(1<<3)
6#define aB4	(1<<4)
7#define aB5	(1<<5)
8#define aB6	(1<<6)
9#define aB7	(1<<7)
10#define aB8	(1<<8)
11#define aB9	(1<<9)
12#define aB10	(1<<10)
13#define aB11	(1<<11)
14#define aB12	(1<<12)
15#define aB13	(1<<13)
16#define aB14	(1<<14)
17#define aB15	(1<<15)
18#define aB16	(1<<16)
19#define aB17	(1<<17)
20#define aB18	(1<<18)
21#define aB19	(1<<19)
22#define aB20	(1<<20)
23#define aB21	(1<<21)
24#define aB22	(1<<22)
25#define aB23	(1<<23)
26#define aB24	(1<<24)
27#define aB25	(1<<25)
28#define aB26	(1<<26)
29#define bB1	(-aB1)
30#define bB2	(-aB2)
31#define bB3	(-aB3)
32#define bB4	(-aB4)
33#define bB5	(-aB5)
34#define bB6	(-aB6)
35#define bB7	(-aB7)
36#define bB8	(-aB8)
37#define bB9	(-aB9)
38#define bB10	(-aB10)
39#define bB11	(-aB11)
40#define bB12	(-aB12)
41#define bB13	(-aB13)
42#define bB14	(-aB14)
43#define bB15	(-aB15)
44#define bB16	(-aB16)
45#define bB17	(-aB17)
46#define bB18	(-aB18)
47#define bB19	(-aB19)
48#define bB20	(-aB20)
49#define bB21	(-aB21)
50#define bB22	(-aB22)
51#define bB23	(-aB23)
52#define bB24	(-aB24)
53#define bB25	(-aB25)
54#define bB26	(-aB26)
55#define cB1	(aB1-1)
56#define cB2	(aB2-1)
57#define cB3	(aB3-1)
58#define cB4	(aB4-1)
59#define cB5	(aB5-1)
60#define cB6	(aB6-1)
61#define cB7	(aB7-1)
62#define cB8	(aB8-1)
63#define cB9	(aB9-1)
64#define cB10	(aB10-1)
65#define cB11	(aB11-1)
66#define cB12	(aB12-1)
67#define cB13	(aB13-1)
68#define cB14	(aB14-1)
69#define cB15	(aB15-1)
70#define cB16	(aB16-1)
71#define cB17	(aB17-1)
72#define cB18	(aB18-1)
73#define cB19	(aB19-1)
74#define cB20	(aB20-1)
75#define cB21	(aB21-1)
76#define cB22	(aB22-1)
77#define cB23	(aB23-1)
78#define cB24	(aB24-1)
79#define cB25	(aB25-1)
80#define cB26	(aB26-1)
81#define dB1	(-aB1+1)
82#define dB2	(-aB2+1)
83#define dB3	(-aB3+1)
84#define dB4	(-aB4+1)
85#define dB5	(-aB5+1)
86#define dB6	(-aB6+1)
87#define dB7	(-aB7+1)
88#define dB8	(-aB8+1)
89#define dB9	(-aB9+1)
90#define dB10	(-aB10+1)
91#define dB11	(-aB11+1)
92#define dB12	(-aB12+1)
93#define dB13	(-aB13+1)
94#define dB14	(-aB14+1)
95#define dB15	(-aB15+1)
96#define dB16	(-aB16+1)
97#define dB17	(-aB17+1)
98#define dB18	(-aB18+1)
99#define dB19	(-aB19+1)
100#define dB20	(-aB20+1)
101#define dB21	(-aB21+1)
102#define dB22	(-aB22+1)
103#define dB23	(-aB23+1)
104#define dB24	(-aB24+1)
105#define dB25	(-aB25+1)
106#define dB26	(-aB26+1)
107
108#define add(a, b)		$(a + b)
109#define sub(a, b)		$(a - b)
110#define rsb(a, b)		$(b - a)
111#define mul(a, b)		$(a * b)
112#define div(a, b)		$(a / b)
113#define rem(a, b)		$(a % b)
114#define and(a, b)		$(a & b)
115#define or(a, b)		$(a | b)
116#define xor(a, b)		$(a ^ b)
117
118#define alu2(N, X, L, R, V)					\
119	movi %r1 L						\
120	N##i %r0 %r1 R						\
121	beqi X %r0 V						\
122	calli @abort						\
123X:
124#define alu1(N, M)						\
125	alu2(N, N##M##1, 3, $(M##1), N(3, M##1))		\
126	alu2(N, N##M##2, 3, $(M##2), N(3, M##2))		\
127	alu2(N, N##M##3, 3, $(M##3), N(3, M##3))		\
128	alu2(N, N##M##4, 3, $(M##4), N(3, M##4))		\
129	alu2(N, N##M##5, 3, $(M##5), N(3, M##5))		\
130	alu2(N, N##M##6, 3, $(M##6), N(3, M##6))		\
131	alu2(N, N##M##7, 3, $(M##7), N(3, M##7))		\
132	alu2(N, N##M##8, 3, $(M##8), N(3, M##8))		\
133	alu2(N, N##M##9, 3, $(M##9), N(3, M##9))		\
134	alu2(N, N##M##10, 3, $(M##10), N(3, M##10))		\
135	alu2(N, N##M##11, 3, $(M##11), N(3, M##11))		\
136	alu2(N, N##M##12, 3, $(M##12), N(3, M##12))		\
137	alu2(N, N##M##13, 3, $(M##13), N(3, M##13))		\
138	alu2(N, N##M##14, 3, $(M##14), N(3, M##14))		\
139	alu2(N, N##M##15, 3, $(M##15), N(3, M##15))		\
140	alu2(N, N##M##16, 3, $(M##16), N(3, M##16))		\
141	alu2(N, N##M##17, 3, $(M##17), N(3, M##17))		\
142	alu2(N, N##M##18, 3, $(M##18), N(3, M##18))		\
143	alu2(N, N##M##19, 3, $(M##19), N(3, M##19))		\
144	alu2(N, N##M##20, 3, $(M##20), N(3, M##20))		\
145	alu2(N, N##M##21, 3, $(M##21), N(3, M##21))		\
146	alu2(N, N##M##22, 3, $(M##22), N(3, M##22))		\
147	alu2(N, N##M##23, 3, $(M##23), N(3, M##23))		\
148	alu2(N, N##M##24, 3, $(M##24), N(3, M##24))		\
149	alu2(N, N##M##25, 3, $(M##25), N(3, M##25))		\
150	alu2(N, N##M##26, 3, $(M##26), N(3, M##26))
151
152#define alu(N)							\
153	alu1(N, aB)						\
154	alu1(N, bB)						\
155	alu1(N, cB)						\
156	alu1(N, dB)
157
158#define _lsh(N)							\
159	alu2(lsh, L##N, 3, N, $(3<<N))
160#define _rsh(N)							\
161	alu2(rsh, R##N, $(1<<63), N, $((1<<63)>>N))
162
163#if __WORDSIZE == 32
164#  define xsh64(X)			/**/
165#else
166#  define xsh64(X)						\
167	_##X##sh(32)						\
168	_##X##sh(33)						\
169	_##X##sh(34)						\
170	_##X##sh(35)						\
171	_##X##sh(36)						\
172	_##X##sh(37)						\
173	_##X##sh(38)						\
174	_##X##sh(39)						\
175	_##X##sh(40)						\
176	_##X##sh(41)						\
177	_##X##sh(42)						\
178	_##X##sh(43)						\
179	_##X##sh(44)						\
180	_##X##sh(45)						\
181	_##X##sh(46)						\
182	_##X##sh(47)						\
183	_##X##sh(48)						\
184	_##X##sh(49)						\
185	_##X##sh(50)						\
186	_##X##sh(51)						\
187	_##X##sh(52)						\
188	_##X##sh(53)						\
189	_##X##sh(54)						\
190	_##X##sh(55)						\
191	_##X##sh(56)						\
192	_##X##sh(57)						\
193	_##X##sh(58)						\
194	_##X##sh(59)						\
195	_##X##sh(60)						\
196	_##X##sh(61)						\
197	_##X##sh(62)						\
198	_##X##sh(63)
199#endif
200
201#define xsh(X)							\
202	_##X##sh(0)						\
203	_##X##sh(1)						\
204	_##X##sh(2)						\
205	_##X##sh(3)						\
206	_##X##sh(4)						\
207	_##X##sh(5)						\
208	_##X##sh(6)						\
209	_##X##sh(7)						\
210	_##X##sh(8)						\
211	_##X##sh(9)						\
212	_##X##sh(10)						\
213	_##X##sh(11)						\
214	_##X##sh(12)						\
215	_##X##sh(13)						\
216	_##X##sh(14)						\
217	_##X##sh(15)						\
218	_##X##sh(16)						\
219	_##X##sh(17)						\
220	_##X##sh(18)						\
221	_##X##sh(19)						\
222	_##X##sh(20)						\
223	_##X##sh(21)						\
224	_##X##sh(22)						\
225	_##X##sh(23)						\
226	_##X##sh(24)						\
227	_##X##sh(25)						\
228	_##X##sh(26)						\
229	_##X##sh(27)						\
230	_##X##sh(28)						\
231	_##X##sh(29)						\
232	_##X##sh(30)						\
233	_##X##sh(31)						\
234	xsh64(X)
235
236#define lsh()							\
237	xsh(l)
238#define rsh()							\
239	xsh(r)
240
241#define reset(V)						\
242	prepare							\
243		pushargi buf					\
244		pushargi V					\
245		pushargi $(M64 + 8)				\
246	finishi @memset
247
248#define stx(T, N, O, V)						\
249	movi %r0 V						\
250	stxi##T O %v0 %r0
251#define stx8(T, M, V)						\
252	stx(T, 3, $(M##B3), V)					\
253	stx(T, 4, $(M##B4), V)					\
254	stx(T, 5, $(M##B5), V)					\
255	stx(T, 6, $(M##B6), V)					\
256	stx(T, 7, $(M##B7), V)					\
257	stx(T, 8, $(M##B8), V)					\
258	stx(T, 9, $(M##B9), V)					\
259	stx(T, 10, $(M##B10), V)				\
260	stx(T, 11, $(M##B11), V)				\
261	stx(T, 12, $(M##B12), V)				\
262	stx(T, 13, $(M##B13), V)				\
263	stx(T, 14, $(M##B14), V)				\
264	stx(T, 15, $(M##B15), V)				\
265	stx(T, 16, $(M##B16), V)				\
266	stx(T, 17, $(M##B17), V)				\
267	stx(T, 18, $(M##B18), V)				\
268	stx(T, 19, $(M##B19), V)				\
269	stx(T, 20, $(M##B20), V)				\
270	stx(T, 21, $(M##B21), V)				\
271	stx(T, 22, $(M##B22), V)				\
272	stx(T, 23, $(M##B23), V)				\
273	stx(T, 24, $(M##B24), V)				\
274	stx(T, 25, $(M##B25), V)				\
275	stx(T, 26, $(M##B26), V)
276#define stx4(T, M, V)						\
277	stx(T, 2, $(M##B2), V)					\
278	stx8(T, M, V)
279#define stx2(T, M, V)						\
280	stx(T, 1, $(M##B1), V)					\
281	stx4(T, M, V)
282#define ldx(T, N, M, O, V)					\
283	movi %r0 0						\
284	ldxi##T %r0 %v0 O					\
285	beqi ldx##T##N##M %r0 V					\
286	calli @abort						\
287ldx##T##N##M:
288#define ldx8(T, M, V)						\
289	ldx(T, 3, M, $(M##B3), V)				\
290	ldx(T, 4, M, $(M##B4), V)				\
291	ldx(T, 5, M, $(M##B5), V)				\
292	ldx(T, 6, M, $(M##B6), V)				\
293	ldx(T, 7, M, $(M##B7), V)				\
294	ldx(T, 8, M, $(M##B8), V)				\
295	ldx(T, 9, M, $(M##B9), V)				\
296	ldx(T, 10, M, $(M##B10), V)				\
297	ldx(T, 11, M, $(M##B11), V)				\
298	ldx(T, 12, M, $(M##B12), V)				\
299	ldx(T, 13, M, $(M##B13), V)				\
300	ldx(T, 14, M, $(M##B14), V)				\
301	ldx(T, 15, M, $(M##B15), V)				\
302	ldx(T, 16, M, $(M##B16), V)				\
303	ldx(T, 17, M, $(M##B17), V)				\
304	ldx(T, 18, M, $(M##B18), V)				\
305	ldx(T, 19, M, $(M##B19), V)				\
306	ldx(T, 20, M, $(M##B20), V)				\
307	ldx(T, 21, M, $(M##B21), V)				\
308	ldx(T, 22, M, $(M##B22), V)				\
309	ldx(T, 23, M, $(M##B23), V)				\
310	ldx(T, 24, M, $(M##B24), V)				\
311	ldx(T, 25, M, $(M##B25), V)				\
312	ldx(T, 26, M, $(M##B26), V)
313#define ldx4(T, M, V)						\
314	ldx(T, 2, M, $(M##B2), V)				\
315	ldx8(T, M, V)
316#define ldx2(T, M, V)						\
317	ldx(T, 1, M, $(M##B1), V)				\
318	ldx4(T, M, V)
319
320#define stf(T, N, O, V)						\
321	movi##T %f0 V						\
322	stxi##T O %v0 %f0
323#define stf8(T, M, V)						\
324	stf(T, 3, $(M##B3), V)					\
325	stf(T, 4, $(M##B4), V)					\
326	stf(T, 5, $(M##B5), V)					\
327	stf(T, 6, $(M##B6), V)					\
328	stf(T, 7, $(M##B7), V)					\
329	stf(T, 8, $(M##B8), V)					\
330	stf(T, 9, $(M##B9), V)					\
331	stf(T, 10, $(M##B10), V)				\
332	stf(T, 11, $(M##B11), V)				\
333	stf(T, 12, $(M##B12), V)				\
334	stf(T, 13, $(M##B13), V)				\
335	stf(T, 14, $(M##B14), V)				\
336	stf(T, 15, $(M##B15), V)				\
337	stf(T, 16, $(M##B16), V)				\
338	stf(T, 17, $(M##B17), V)				\
339	stf(T, 18, $(M##B18), V)				\
340	stf(T, 19, $(M##B19), V)				\
341	stf(T, 20, $(M##B20), V)				\
342	stf(T, 21, $(M##B21), V)				\
343	stf(T, 22, $(M##B22), V)				\
344	stf(T, 23, $(M##B23), V)				\
345	stf(T, 24, $(M##B24), V)				\
346	stf(T, 25, $(M##B25), V)				\
347	stf(T, 26, $(M##B26), V)
348#define stf4(T, M, V)						\
349	stf(T, 2, $(M##B2), V)					\
350	stf8(T, M, V)
351#define ldf(T, N, M, O, V)					\
352	movi##T %f0 0						\
353	ldxi##T %f0 %v0 O					\
354	beqi##T ldf##T##N##M %f0 V				\
355	calli @abort						\
356ldf##T##N##M:
357#define ldf8(T, M, V)						\
358	ldf(T, 3, M, $(M##B3), V)				\
359	ldf(T, 4, M, $(M##B4), V)				\
360	ldf(T, 5, M, $(M##B5), V)				\
361	ldf(T, 6, M, $(M##B6), V)				\
362	ldf(T, 7, M, $(M##B7), V)				\
363	ldf(T, 8, M, $(M##B8), V)				\
364	ldf(T, 9, M, $(M##B9), V)				\
365	ldf(T, 10, M, $(M##B10), V)				\
366	ldf(T, 11, M, $(M##B11), V)				\
367	ldf(T, 12, M, $(M##B12), V)				\
368	ldf(T, 13, M, $(M##B13), V)				\
369	ldf(T, 14, M, $(M##B14), V)				\
370	ldf(T, 15, M, $(M##B15), V)				\
371	ldf(T, 16, M, $(M##B16), V)				\
372	ldf(T, 17, M, $(M##B17), V)				\
373	ldf(T, 18, M, $(M##B18), V)				\
374	ldf(T, 19, M, $(M##B19), V)				\
375	ldf(T, 20, M, $(M##B20), V)				\
376	ldf(T, 21, M, $(M##B21), V)				\
377	ldf(T, 22, M, $(M##B22), V)				\
378	ldf(T, 23, M, $(M##B23), V)				\
379	ldf(T, 24, M, $(M##B24), V)				\
380	ldf(T, 25, M, $(M##B25), V)				\
381	ldf(T, 26, M, $(M##B26), V)
382#define ldf4(T, M, V)						\
383	ldf(T, 2, M, $(M##B2), V)				\
384	ldf8(T, M, V)
385
386#define ldst_c()						\
387	reset(0xa5)						\
388	movi %v0 buf						\
389	stx2(_c, a, 0x5a)					\
390	ldx2(_c, a, 0x5a)					\
391	reset(0xa5)						\
392	movi %v0 $(buf + M64)					\
393	stx2(_c, b, 0x5a)					\
394	ldx2(_c, b, 0x5a)
395#define ldst_uc()						\
396	reset(0xa5)						\
397	movi %v0 buf						\
398	stx2(_c, a, 0x5a)					\
399	ldx2(_uc, a, 0x5a)					\
400	movi %v0 $(buf + M64)					\
401	stx2(_c, b, 0x5a)					\
402	ldx2(_uc, b, 0x5a)
403#define ldst_s()						\
404	reset(0xa5)						\
405	movi %v0 buf						\
406	stx2(_s, a, 0x5a5a)					\
407	ldx2(_s, a, 0x5a5a)					\
408	reset(0xa5)						\
409	movi %v0 $(buf + M64)					\
410	stx2(_s, b, 0x5a5a)					\
411	ldx2(_s, b, 0x5a5a)
412#define ldst_us()						\
413	reset(0xa5)						\
414	movi %v0 buf						\
415	stx2(_s, a, 0x5a5a)					\
416	ldx2(_us, a, 0x5a5a)					\
417	reset(0xa5)						\
418	movi %v0 $(buf + M64)					\
419	stx2(_s, b, 0x5a5a)					\
420	ldx2(_us, b, 0x5a5a)
421#define ldst_i()						\
422	reset(0xa5)						\
423	movi %v0 buf						\
424	stx4(_i, a, 0x5a5a5a5a)					\
425	ldx4(_i, a, 0x5a5a5a5a)					\
426	reset(0xa5)						\
427	movi %v0 $(buf + M64)					\
428	stx4(_i, b, 0x5a5a5a5a)					\
429	ldx4(_i, b, 0x5a5a5a5a)
430#define ldst_ui()						\
431	reset(0xa5)						\
432	movi %v0 buf						\
433	stx4(_i, a, 0x5a5a5a5a)					\
434	ldx4(_ui, a, 0x5a5a5a5a)				\
435	reset(0xa5)						\
436	movi %v0 $(buf + M64)					\
437	stx4(_i, b, 0x5a5a5a5a)					\
438	ldx4(_ui, b, 0x5a5a5a5a)
439#define ldst_l()						\
440	reset(0xa5)						\
441	movi %v0 buf						\
442	stx8(_l, a, 0x5a5a5a5a5a5a5a5a)				\
443	ldx8(_l, a, 0x5a5a5a5a5a5a5a5a)				\
444	reset(0xa5)						\
445	movi %v0 $(buf + M64)					\
446	stx8(_l, b, 0x5a5a5a5a5a5a5a5a)				\
447	ldx8(_l, b, 0x5a5a5a5a5a5a5a5a)
448#define ldst_f()						\
449	reset(0xa5)						\
450	movi %v0 buf						\
451	stf4(_f, a, 0.5)					\
452	ldf4(_f, a, 0.5)					\
453	reset(0xa5)						\
454	movi %v0 $(buf + M64)					\
455	stf4(_f, b, 0.5)					\
456	ldf4(_f, b, 0.5)
457#define ldst_d()						\
458	reset(0xa5)						\
459	movi %v0 buf						\
460	stf8(_d, a, 0.5)					\
461	ldf8(_d, a, 0.5)					\
462	reset(0xa5)						\
463	movi %v0 $(buf + M64)					\
464	stf8(_d, b, 0.5)					\
465	ldf8(_d, b, 0.5)
466
467.data		67112960
468buf:
469.size		M64
470.size		8
471ok:
472.c		"ok"
473
474.code
475	prolog
476
477	alu(add)
478	alu(sub)
479	alu(rsb)
480	alu(mul)
481	alu(div)
482	alu(rem)
483	lsh()
484	rsh()
485	alu(and)
486	alu(or)
487	alu(xor)
488	ldst_c()
489	ldst_uc()
490	ldst_s()
491	ldst_us()
492	ldst_i()
493#if __WORDSIZE == 64
494	ldst_ui()
495	ldst_l()
496#endif
497	ldst_f()
498	ldst_d()
499
500	prepare
501		pushargi ok
502	finishi @puts
503	ret
504	epilog
505