1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 *   Yabause - linkage_x64.s                                               *
3 *   Copyright (C) 2009-2011 Ari64                                         *
4 *                                                                         *
5 *   This program is free software; you can redistribute it and/or modify  *
6 *   it under the terms of the GNU General Public License as published by  *
7 *   the Free Software Foundation; either version 2 of the License, or     *
8 *   (at your option) any later version.                                   *
9 *                                                                         *
10 *   This program is distributed in the hope that it will be useful,       *
11 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13 *   GNU General Public License for more details.                          *
14 *                                                                         *
15 *   You should have received a copy of the GNU General Public License     *
16 *   along with this program; if not, write to the                         *
17 *   Free Software Foundation, Inc.,                                       *
18 *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.          *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20	.file	"linkage_x86_64.s"
21	.bss
22	.align 4
23	.section	.rodata
24	.text
25.globl YabauseDynarecOneFrameExec
26	.type	YabauseDynarecOneFrameExec, @function
27YabauseDynarecOneFrameExec:
28/* (arg1/edi - m68kcycles) */
29/* (arg2/esi - m68kcenticycles) */
30	push	%rbp
31	mov	%rsp, %rbp
32	mov	master_ip, %rax
33	xor	%ecx, %ecx
34	push	%rbx
35	push	%r12
36	push	%r13
37	push	%r14
38	push	%r15
39	push	%rcx /* zero */
40	push	%rcx
41	push	%rcx
42	push	%rcx
43	call	.+5
44	mov	%esi,-60(%rbp) /* m68kcenticycles */
45	mov	%edi,-64(%rbp) /* m68kcycles */
46	mov	%rax,-80(%rbp) /* overwrite return address */
47/* Stack frame:
48   return address (0)
49   rbp (8/0)
50   save rbx (16/8)
51   save r12 (24/16)
52   save r13 (32/24)
53   save r14 (40/32)
54   save r15 (48/40)
55   decilinecount (52/44)
56   decilinecycles (56/48)
57   sh2cycles (60/52)
58   scucycles (64/56)
59   m68kcenticycles (68/60)
60   m68kcycles (72/64)
61   space for alignment (80/72)
62   ret address/master_ip (88/80) (alternate rsp at call)
63   save %rax (96/88)
64   save %rcx (104/96)
65   save %rdx (112/104)
66   save %rsi (120/112)
67   save %rdi (128/120)
68   space for alignment (136/128) (rsp at call)
69   next return address (144/136)
70   total = 144 */
71/*   usecinc?
72   cyclesinc?*/
73
74newline:
75/* const u32 decilinecycles = yabsys.DecilineStop >> YABSYS_TIMING_BITS; */
76/* const u32 cyclesinc = yabsys.DecilineStop * 10; */
77	mov	decilinestop_p, %rax
78	mov	yabsys_timing_bits, %ecx
79	mov	(%rax), %eax
80	lea	(%eax,%eax,4), %ebx /* decilinestop*5 */
81	shr	%cl, %eax /* decilinecycles */
82	shl	%ebx	/* cyclesinc=decilinestop*10 */
83	lea	(%eax,%eax,8), %edx  /* decilinecycles*9 */
84        /* yabsys.SH2CycleFrac += cyclesinc;*/
85        /* sh2cycles = (yabsys.SH2CycleFrac >> (YABSYS_TIMING_BITS + 1)) << 1;*/
86        /* yabsys.SH2CycleFrac &= ((YABSYS_TIMING_MASK << 1) | 1);*/
87	mov	SH2CycleFrac_p, %rsi
88	mov	yabsys_timing_mask, %edi
89	inc	%ecx /* yabsys_timing_bits+1 */
90	add	(%rsi), %ebx /* SH2CycleFrac */
91	stc
92	adc	%edi, %edi /* ((YABSYS_TIMING_MASK << 1) | 1) */
93	mov	%eax, -48(%rbp) /* decilinecycles */
94	and	%ebx, %edi
95	mov	%edi, (%rsi) /* SH2CycleFrac */
96	shr	%cl, %ebx
97	mov	%ebx, -56(%rbp) /* scucycles */
98	add	%ebx, %ebx /* sh2cycles */
99	mov	MSH2, %rax
100	mov	NumberOfInterruptsOffset, %ecx
101	sub	%edx, %ebx  /* sh2cycles(full line) - decilinecycles*9 */
102	mov	%ebx, -52(%rbp) /* sh2cycles */
103	cmp	$0, (%rax, %rcx)
104	jne	master_handle_interrupts
105	mov	master_cc, %esi
106	sub	%ebx, %esi
107	ret	/* jmp master_ip */
108	.size	YabauseDynarecOneFrameExec, .-YabauseDynarecOneFrameExec
109
110.globl master_handle_interrupts
111	.type	master_handle_interrupts, @function
112master_handle_interrupts:
113	mov	-80(%rbp), %rax /* get return address */
114	mov	%rax, master_ip
115	call	DynarecMasterHandleInterrupts
116	mov	master_ip, %rax
117	mov	master_cc, %esi
118	mov	%rax,-80(%rbp) /* overwrite return address */
119	sub	%ebx, %esi
120	ret	/* jmp master_ip */
121	.size	master_handle_interrupts, .-master_handle_interrupts
122
123.globl slave_entry
124	.type	slave_entry, @function
125slave_entry:
126	mov	28(%rsp), %ebx /* sh2cycles */
127	mov	%esi, master_cc
128	mov	%ebx, %edi
129	call	FRTExec
130	mov	%ebx, %edi
131	call	WDTExec
132	mov	slave_ip, %rdx
133	test	%edx, %edx
134	je	cc_interrupt_master /* slave not running */
135	mov	SSH2, %rax
136	mov	NumberOfInterruptsOffset, %ecx
137	cmp	$0, (%rax, %rcx)
138	jne	slave_handle_interrupts
139	mov	slave_cc, %esi
140	sub	%ebx, %esi
141	jmp	*%rdx /* jmp *slave_ip */
142	.size	slave_entry, .-slave_entry
143
144.globl slave_handle_interrupts
145	.type	slave_handle_interrupts, @function
146slave_handle_interrupts:
147	call	DynarecSlaveHandleInterrupts
148	mov	slave_ip, %rdx
149	mov	slave_cc, %esi
150	sub	%ebx, %esi
151	jmp	*%rdx /* jmp *slave_ip */
152	.size	slave_handle_interrupts, .-slave_handle_interrupts
153
154.globl cc_interrupt
155	.type	cc_interrupt, @function
156cc_interrupt: /* slave */
157	mov	28(%rsp), %ebx /* sh2cycles */
158	mov	%rbp, slave_ip
159	mov	%esi, slave_cc
160	mov	%ebx, %edi
161	call	FRTExec
162	mov	%ebx, %edi
163	call	WDTExec
164	.size	cc_interrupt, .-cc_interrupt
165.globl cc_interrupt_master
166	.type	cc_interrupt_master, @function
167cc_interrupt_master:
168	lea	80(%rsp), %rbp
169	mov	-44(%rbp), %eax /* decilinecount */
170	mov	-48(%rbp), %ebx /* decilinecycles */
171	inc	%eax
172	cmp	$9, %eax
173	ja	.A3
174	mov	%eax, -44(%rbp) /* decilinecount++ */
175	je	.A2
176	mov	%ebx, -52(%rbp) /* sh2cycles */
177.A1:
178	mov	master_cc, %esi
179	mov	MSH2, %rax
180	mov	NumberOfInterruptsOffset, %ecx
181	cmpl	$0, (%rax, %rcx)
182	jne	master_handle_interrupts
183	sub	%ebx, %esi
184	ret	/* jmp master_ip */
185.A2:
186	call	Vdp2HBlankIN
187	jmp	.A1
188.A3:
189	mov	-56(%rbp), %edi /* scucycles */
190	call	ScuExec
191	call	M68KSync
192	call	Vdp2HBlankOUT
193	call	ScspExec
194	mov	linecount_p, %rbx
195	mov	maxlinecount_p, %rax
196	mov	vblanklinecount_p, %rcx
197	mov	(%rbx), %edx
198	mov	(%rax), %eax
199	mov	(%rcx), %ecx
200	inc	%edx
201	andl	$0, -44(%rbp) /* decilinecount=0 */
202	cmp	%eax, %edx /* max ? */
203	je	nextframe
204	mov	%edx, (%rbx) /* linecount++ */
205	cmp	%ecx, %edx /* vblank ? */
206	je	vblankin
207nextline:
208	call	finishline
209	jmp	newline
210finishline:
211      /*const u32 usecinc = yabsys.DecilineUsec * 10;*/
212	mov	decilineusec_p, %rax
213	mov	UsecFrac_p, %rbx
214	mov	yabsys_timing_bits, %ecx
215	mov	(%rax), %eax
216	mov	(%rbx), %edx
217	lea	(%eax,%eax,4), %edi
218	add	%edi, %edi
219      /*yabsys.UsecFrac += usecinc;*/
220	add	%edx, %edi
221	add	$-8, %rsp /* Align stack */
222      /*SmpcExec(yabsys.UsecFrac >> YABSYS_TIMING_BITS);
223      /*Cs2Exec(yabsys.UsecFrac >> YABSYS_TIMING_BITS);
224      /*yabsys.UsecFrac &= YABSYS_TIMING_MASK;*/
225	mov	%edi, (%rbx) /* UsecFrac */
226	shr	%cl, %edi
227	call	SmpcExec
228	/* SmpcExec may modify UsecFrac; must reload it */
229	mov	yabsys_timing_mask, %r12d
230	mov	(%rbx), %edi /* UsecFrac */
231	mov	yabsys_timing_bits, %ecx
232	and	%edi, %r12d
233	shr	%cl, %edi
234	call	Cs2Exec
235	mov	%r12d, (%rbx) /* UsecFrac */
236	mov	saved_centicycles, %ecx
237	mov	-60(%rbp), %ebx /* m68kcenticycles */
238	mov	-64(%rbp), %edi /* m68kcycles */
239	add	%ebx, %ecx
240	mov	%ecx, %ebx
241	add	$-100, %ecx
242	cmovnc	%ebx, %ecx
243	adc	$0, %edi
244	mov	%ecx, saved_centicycles
245	call	M68KExec
246	add	$8, %rsp /* Align stack */
247	ret
248vblankin:
249	call	SmpcINTBACKEnd
250	call	Vdp2VBlankIN
251	call	CheatDoPatches
252	jmp	nextline
253nextframe:
254	call	Vdp2VBlankOUT
255	andl	$0, (%rbx) /* linecount = 0 */
256	call	finishline
257	call	M68KSync
258	mov	rccount, %esi
259	inc	%esi
260	andl	$0, invalidate_count
261	and	$0x3f, %esi
262	cmpl	$0, restore_candidate(,%esi,4)
263	mov	%esi, rccount
264	jne	.A5
265.A4:
266	mov	(%rsp), %rax
267	add	$40, %rsp
268	mov	%rax, master_ip
269	pop	%r15 /* restore callee-save registers */
270	pop	%r14
271	pop	%r13
272	pop	%r12
273	pop	%rbx
274	pop	%rbp
275	ret
276.A5:
277	/* Move 'dirty' blocks to the 'clean' list */
278	mov	restore_candidate(,%esi,4), %ebx
279	mov	%esi, %ebp
280	andl	$0, restore_candidate(,%esi,4)
281	shl	$5, %ebp
282.A6:
283	shr	$1, %ebx
284	jnc	.A7
285	mov	%ebp, %edi
286	call	clean_blocks
287.A7:
288	inc	%ebp
289	test	$31, %ebp
290	jne	.A6
291	jmp	.A4
292	.size	cc_interrupt_master, .-cc_interrupt_master
293
294.globl dyna_linker
295	.type	dyna_linker, @function
296dyna_linker:
297	/* eax = virtual target address */
298	/* ebx = instruction to patch */
299	mov	%eax, %ecx
300	mov	$1023, %edx
301	shr	$12, %ecx
302	and	%ecx, %edx
303	and	$0xDFFFF, %ecx
304	or	$1024, %edx
305	cmp	%edx, %ecx
306	cmova	%edx, %ecx
307	/* jump_in lookup */
308	movq	jump_in(,%ecx,8), %r12
309.B1:
310	test	%r12, %r12
311	je	.B3
312	mov	(%r12), %edi
313	xor	%eax, %edi
314	je	.B2
315	movq	16(%r12), %r12
316	jmp	.B1
317.B2:
318	mov	(%ebx), %edi
319	mov	%esi, %ebp
320	lea	4(%ebx,%edi,1), %esi
321	mov	%eax, %edi
322	call	add_link
323	mov	8(%r12), %edi
324	mov	%ebp, %esi
325	lea	-4(%edi), %edx
326	subl	%ebx, %edx
327	movl	%edx, (%ebx)
328	jmp	*%rdi
329.B3:
330	/* hash_table lookup */
331	mov	%eax, %edi
332	shr	$16, %edi
333	xor	%eax, %edi
334	movzwl	%di, %edi
335	shl	$4, %edi
336	cmp	hash_table(%edi), %eax
337	jne	.B5
338.B4:
339	mov	hash_table+4(%edi), %edx
340	jmp	*%rdx
341.B5:
342	cmp	hash_table+8(%edi), %eax
343	lea	8(%edi), %edi
344	je	.B4
345	/* jump_dirty lookup */
346	movq	jump_dirty(,%ecx,8), %r12
347.B6:
348	test	%r12, %r12
349	je	.B8
350	mov	(%r12), %ecx
351	xor	%eax, %ecx
352	je	.B7
353	movq	16(%r12), %r12
354	jmp	.B6
355.B7:
356	movl	8(%r12), %edx
357	/* hash_table insert */
358	mov	hash_table-8(%edi), %ebx
359	mov	hash_table-4(%edi), %ecx
360	mov	%eax, hash_table-8(%edi)
361	mov	%edx, hash_table-4(%edi)
362	mov	%ebx, hash_table(%edi)
363	mov	%ecx, hash_table+4(%edi)
364	jmp	*%rdx
365.B8:
366	mov	%eax, %edi
367	mov	%eax, %ebp /* Note: assumes %rbx and %rbp are callee-saved */
368	mov	%esi, %r12d
369	call	sh2_recompile_block
370	test	%eax, %eax
371	mov	%ebp, %eax
372	mov	%r12d, %esi
373	je	dyna_linker
374	/* shouldn't happen */
375	int3
376	.size	dyna_linker, .-dyna_linker
377
378.globl jump_vaddr_eax_master
379	.type	jump_vaddr_eax_master, @function
380jump_vaddr_eax_master:
381	mov	%eax, %edi
382	jmp	jump_vaddr_edi_master
383	.size	jump_vaddr_eax_master, .-jump_vaddr_eax_master
384.globl jump_vaddr_ecx_master
385	.type	jump_vaddr_ecx_master, @function
386jump_vaddr_ecx_master:
387	mov	%ecx, %edi
388	jmp	jump_vaddr_edi_master
389	.size	jump_vaddr_ecx_master, .-jump_vaddr_ecx_master
390.globl jump_vaddr_edx_master
391	.type	jump_vaddr_edx_master, @function
392jump_vaddr_edx_master:
393	mov	%edx, %edi
394	jmp	jump_vaddr_edi_master
395	.size	jump_vaddr_edx_master, .-jump_vaddr_edx_master
396.globl jump_vaddr_ebx_master
397	.type	jump_vaddr_ebx_master, @function
398jump_vaddr_ebx_master:
399	mov	%ebx, %edi
400	jmp	jump_vaddr_edi_master
401	.size	jump_vaddr_ebx_master, .-jump_vaddr_ebx_master
402.globl jump_vaddr_ebp_master
403	.type	jump_vaddr_ebp_master, @function
404jump_vaddr_ebp_master:
405	mov	%ebp, %edi
406	jmp	jump_vaddr_edi_master
407	.size	jump_vaddr_ebp_master, .-jump_vaddr_ebp_master
408.globl jump_vaddr_eax_slave
409	.type	jump_vaddr_eax_slave, @function
410jump_vaddr_eax_slave:
411	mov	%eax, %edi
412	jmp	jump_vaddr_edi_slave
413	.size	jump_vaddr_eax_slave, .-jump_vaddr_eax_slave
414.globl jump_vaddr_ecx_slave
415	.type	jump_vaddr_ecx_slave, @function
416jump_vaddr_ecx_slave:
417	mov	%ecx, %edi
418	jmp	jump_vaddr_edi_slave
419	.size	jump_vaddr_ecx_slave, .-jump_vaddr_ecx_slave
420.globl jump_vaddr_edx_slave
421	.type	jump_vaddr_edx_slave, @function
422jump_vaddr_edx_slave:
423	mov	%edx, %edi
424	jmp	jump_vaddr_edi_slave
425	.size	jump_vaddr_edx_slave, .-jump_vaddr_edx_slave
426.globl jump_vaddr_ebx_slave
427	.type	jump_vaddr_ebx_slave, @function
428jump_vaddr_ebx_slave:
429	mov	%ebx, %edi
430	jmp	jump_vaddr_edi_slave
431	.size	jump_vaddr_ebx_slave, .-jump_vaddr_ebx_slave
432.globl jump_vaddr_ebp_slave
433	.type	jump_vaddr_ebp_slave, @function
434jump_vaddr_ebp_slave:
435	mov	%ebp, %edi
436	.size	jump_vaddr_ebp_slave, .-jump_vaddr_ebp_slave
437.globl jump_vaddr_edi_slave
438	.type	jump_vaddr_edi_slave, @function
439jump_vaddr_edi_slave:
440	or	$1, %edi
441	.size	jump_vaddr_edi_slave, .-jump_vaddr_edi_slave
442.globl jump_vaddr_edi_master
443	.type	jump_vaddr_edi_master, @function
444jump_vaddr_edi_master:
445	mov	%edi, %eax
446	.size	jump_vaddr_edi_master, .-jump_vaddr_edi_master
447
448.globl jump_vaddr
449	.type	jump_vaddr, @function
450jump_vaddr:
451  /* Check hash table */
452	shr	$16, %eax
453	xor	%edi, %eax
454	movzwl	%ax, %eax
455	shl	$4, %eax
456	cmp	hash_table(%eax), %edi
457	jne	.C2
458.C1:
459	mov	hash_table+4(%eax), %edi
460	jmp	*%rdi
461.C2:
462	cmp	hash_table+8(%eax), %edi
463	lea	8(%eax), %eax
464	je	.C1
465  /* No hit on hash table, call compiler */
466	mov	%esi, %ebx /* CCREG */
467	call	get_addr
468	mov	%ebx, %esi
469	jmp	*%rax
470	.size	jump_vaddr, .-jump_vaddr
471
472.globl verify_code
473	.type	verify_code, @function
474verify_code:
475	/* rax = source */
476	/* ebx = target */
477	/* ecx = length */
478	/* r12d = instruction pointer */
479	mov	-4(%rax,%rcx,1), %edi
480	xor	-4(%ebx,%ecx,1), %edi
481	jne	.D4
482	mov	%ecx, %edx
483	add	$-4, %ecx
484	je	.D3
485	test	$4, %edx
486	cmove	%edx, %ecx
487.D2:
488	mov	-8(%rax,%rcx,1), %rdi
489	cmp	-8(%ebx,%ecx,1), %rdi
490	jne	.D4
491	add	$-8, %ecx
492	jne	.D2
493.D3:
494	ret
495.D4:
496	add	$8, %rsp /* pop return address, we're not returning */
497	mov	%r12d, %edi
498	mov	%esi, %ebx
499	call	get_addr
500	mov	%ebx, %esi
501	jmp	*%rax
502	.size	verify_code, .-verify_code
503
504.globl WriteInvalidateLong
505	.type	WriteInvalidateLong, @function
506WriteInvalidateLong:
507	mov	%edi, %ecx
508	shr	$12, %ecx
509	bt	%ecx, cached_code
510	jnc	MappedMemoryWriteLongNocache
511	/*push	%rax*/
512	/*push	%rcx*/
513	push	%rdx /* unused, for stack alignment */
514	push	%rsi
515	push	%rdi
516	call	invalidate_addr
517	pop	%rdi
518	pop	%rsi
519	pop	%rdx /* unused, for stack alignment */
520	/*pop	%rcx*/
521	/*pop	%rax*/
522	jmp	MappedMemoryWriteLongNocache
523	.size	WriteInvalidateLong, .-WriteInvalidateLong
524.globl WriteInvalidateWord
525	.type	WriteInvalidateWord, @function
526WriteInvalidateWord:
527	mov	%edi, %ecx
528	shr	$12, %ecx
529	bt	%ecx, cached_code
530	jnc	MappedMemoryWriteWordNocache
531	/*push	%rax*/
532	/*push	%rcx*/
533	push	%rdx /* unused, for stack alignment */
534	push	%rsi
535	push	%rdi
536	call	invalidate_addr
537	pop	%rdi
538	pop	%rsi
539	pop	%rdx /* unused, for stack alignment */
540	/*pop	%rcx*/
541	/*pop	%rax*/
542	jmp	MappedMemoryWriteWordNocache
543	.size	WriteInvalidateWord, .-WriteInvalidateWord
544.globl WriteInvalidateByteSwapped
545	.type	WriteInvalidateByteSwapped, @function
546WriteInvalidateByteSwapped:
547	xor	$1, %edi
548	.size	WriteInvalidateByteSwapped, .-WriteInvalidateByteSwapped
549.globl WriteInvalidateByte
550	.type	WriteInvalidateByte, @function
551WriteInvalidateByte:
552	mov	%edi, %ecx
553	shr	$12, %ecx
554	bt	%ecx, cached_code
555	jnc	MappedMemoryWriteByteNocache
556	/*push	%rax*/
557	/*push	%rcx*/
558	push	%rdx /* unused, for stack alignment */
559	push	%rsi
560	push	%rdi
561	call	invalidate_addr
562	pop	%rdi
563	pop	%rsi
564	pop	%rdx /* unused, for stack alignment */
565	/*pop	%rcx*/
566	/*pop	%rax*/
567	jmp	MappedMemoryWriteByteNocache
568	.size	WriteInvalidateByte, .-WriteInvalidateByte
569
570.globl div1
571	.type	div1, @function
572div1:
573	/* eax = dividend */
574	/* ecx = divisor */
575	/* edx = sr */
576	bt	$9, %edx   /* M bit */
577	jc	div1_negative_divisor
578	bts	$0, %edx   /* Get T bit and set */
579	adc 	%eax, %eax /* rn=(rn<<1)+T */
580	adc	%ebx, %ebx /* New Q in ebx */
581	mov	%ecx, %ebp
582	btr	$8, %edx   /* Get Q bit and clear it */
583	cmc
584	sbb	%edi, %edi /* 0xFFFFFFFF if old_Q clear, 0 otherwise */
585	sbb	$0, %ebp
586	xor	%edi, %ebp
587	add	%ebp, %eax /* rn+rm if old_Q, rn-rm if !old_Q */
588		           /* carry set if rn < old_rn */
589	adc	%edi, %ebx /* low bit = (rn<old_rn)^new_Q^!old_Q */
590	                   /* inverted for old_Q==0, ie (rn>=old_rn)^new_Q */
591	not	%edi	   /* if old_Q clear, edi=0 */
592	or	%ebp, %edi /* zero if old_Q==0 && rn==old_rn */
593	neg	%edi       /* clear carry if edi==0 */
594	adc	$-1, %ebx  /* invert result for old_Q==0 && rn==old_rn */
595	and	$1, %ebx
596	xor	%ebx, %edx /* New T = (Q==M) */
597	shl	$8, %ebx
598	or	%ebx, %edx /* save new Q */
599/*
600	push	%edx
601	push	%eax
602	push	%ecx
603	call	debug_division
604	pop	%ecx
605	pop	%eax
606	pop	%edx
607*/
608	ret
609div1_negative_divisor:
610	btr	$0, %edx   /* Get T bit and clear */
611	adc 	%eax, %eax /* rn=(rn<<1)+T */
612	adc	%ebx, %ebx /* New Q in ebx */
613	mov	%ecx, %ebp
614	btr	$8, %edx   /* Get Q bit and clear it */
615	sbb	%edi, %edi /* 0xFFFFFFFF if old_Q set, 0 otherwise */
616	sbb	$0, %ebp
617	xor	%edi, %ebp
618	not	%edi	   /* if old_Q clear, edi=-1 */
619	add	%ebp, %eax /* rn+rm if !old_Q, rn-rm if old_Q */
620		           /* carry set if rn < old_rn */
621	adc	%edi, %ebx /* low bit = (rn<old_rn)^new_Q^!old_Q */
622	                   /* inverted for old_Q==0, ie (rn>=old_rn)^new_Q */
623	or	%ebp, %edi /* zero if old_Q==1 && rn==old_rn */
624	neg	%edi       /* clear carry if edi==0 */
625	adc	$-1, %ebx  /* invert result for old_Q==1 && rn==old_rn */
626	and	$1, %ebx
627	xor	%ebx, %edx /* New T = (Q==M) */
628	shl	$8, %ebx
629	or	%ebx, %edx /* save new Q */
630	ret
631	.size	div1, .-div1
632
633.globl macl
634	.type	macl, @function
635macl:
636	/* ebx = sr */
637	/* ebp = multiplicand address */
638	/* edi = multiplicand address */
639	/* eax = return MACL */
640	/* edx = return MACH */
641	mov	%edx, %r12d /* MACH */
642	mov	%eax, %r13d /* MACL */
643	mov	%ebp, %r14d
644	mov	%edi, %r15d
645	call	MappedMemoryReadLongNocache
646	mov	%eax, %esi
647	mov	%r14d, %edi
648	call	MappedMemoryReadLongNocache
649	lea	4(%r14), %ebp
650	lea	4(%r15), %edi
651	imul	%esi
652	add	%r13d, %eax /* MACL */
653	adc	%r12d, %edx /* MACH */
654	test	$0x2, %bl
655	jne	macl_saturation
656	ret
657macl_saturation:
658	mov	$0xFFFF8000, %esi
659	xor	%ecx, %ecx
660	cmp	%esi, %edx
661	cmovl	%esi, %edx
662	cmovl	%ecx, %eax
663	not	%esi
664	not	%ecx
665	cmp	%esi, %edx
666	cmovg	%esi, %edx
667	cmovg	%ecx, %eax
668	ret
669	.size	macl, .-macl
670
671.globl macw
672	.type	macw, @function
673macw:
674	/* ebx = sr */
675	/* ebp = multiplicand address */
676	/* edi = multiplicand address */
677	/* eax = return MACL */
678	/* edx = return MACH */
679	mov	%edx, %r12d /* MACH */
680	mov	%eax, %r13d /* MACL */
681	mov	%ebp, %r14d
682	mov	%edi, %r15d
683	call	MappedMemoryReadWordNocache
684	movswl	%ax, %esi
685	mov	%r14d, %edi
686	call	MappedMemoryReadWordNocache
687	movswl	%ax, %eax
688	lea	2(%r14), %ebp
689	lea	2(%r15), %edi
690	imul	%esi
691	test	$0x2, %bl
692	jne	macw_saturation
693	add	%r13d, %eax /* MACL */
694	adc	%r12d, %edx /* MACH */
695	ret
696macw_saturation:
697	mov	%r13d, %esi
698	sar	$31, %esi
699	add	%r13d, %eax /* MACL */
700	adc	%esi, %edx
701	mov	$0x80000000, %esi
702	mov	$0x7FFFFFFF, %ecx
703	add	%eax, %esi
704	adc	$0, %edx
705	cmovne	%ecx, %eax
706	not	%ecx
707	cmovl	%ecx, %eax
708	mov	%r12d, %edx
709	ret
710	.size	macw, .-macw
711
712.globl master_handle_bios
713	.type	master_handle_bios, @function
714master_handle_bios:
715	mov	(%rsp), %rdx /* get return address */
716	mov	%eax, master_pc
717	mov	%esi, master_cc
718	mov	%rdx, master_ip
719	mov	MSH2, %rdi
720	call	BiosHandleFunc
721	mov	master_ip, %rdx
722	mov	master_cc, %esi
723	mov	%rdx, (%rsp)
724	ret	/* jmp *master_ip */
725	.size	master_handle_bios, .-master_handle_bios
726
727.globl slave_handle_bios
728	.type	slave_handle_bios, @function
729slave_handle_bios:
730	pop	%rdx /* get return address */
731	mov	%eax, slave_pc
732	mov	%esi, slave_cc
733	mov	%rdx, slave_ip
734	mov	SSH2, %rdi
735	call	BiosHandleFunc
736	mov	slave_ip, %rdx
737	mov	slave_cc, %esi
738	jmp	*%rdx /* jmp *slave_ip */
739	.size	slave_handle_bios, .-slave_handle_bios
740
741.globl breakpoint
742	.type	breakpoint, @function
743breakpoint:
744	ret
745	/* Set breakpoint here for debugging */
746	.size	breakpoint, .-breakpoint
747