1;; libgcc routines for the Renesas H8/300 CPU.
2;; Contributed by Steve Chamberlain <sac@cygnus.com>
3;; Optimizations by Toshiyasu Morita <toshiyasu.morita@renesas.com>
4
5/* Copyright (C) 1994-2022 Free Software Foundation, Inc.
6
7This file is free software; you can redistribute it and/or modify it
8under the terms of the GNU General Public License as published by the
9Free Software Foundation; either version 3, or (at your option) any
10later version.
11
12This file is distributed in the hope that it will be useful, but
13WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15General Public License for more details.
16
17Under Section 7 of GPL version 3, you are granted additional
18permissions described in the GCC Runtime Library Exception, version
193.1, as published by the Free Software Foundation.
20
21You should have received a copy of the GNU General Public License and
22a copy of the GCC Runtime Library Exception along with this program;
23see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24<http://www.gnu.org/licenses/>.  */
25
26/* Assembler register definitions.  */
27
28#define A0 r0
29#define A0L r0l
30#define A0H r0h
31
32#define A1 r1
33#define A1L r1l
34#define A1H r1h
35
36#define A2 r2
37#define A2L r2l
38#define A2H r2h
39
40#define A3 r3
41#define A3L r3l
42#define A3H r3h
43
44#define S0 r4
45#define S0L r4l
46#define S0H r4h
47
48#define S1 r5
49#define S1L r5l
50#define S1H r5h
51
52#define S2 r6
53#define S2L r6l
54#define S2H r6h
55
56#ifdef __H8300__
57#define PUSHP	push
58#define POPP	pop
59
60#define A0P	r0
61#define A1P	r1
62#define A2P	r2
63#define A3P	r3
64#define S0P	r4
65#define S1P	r5
66#define S2P	r6
67#endif
68
69#if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__)
70#define PUSHP	push.l
71#define POPP	pop.l
72
73#define A0P	er0
74#define A1P	er1
75#define A2P	er2
76#define A3P	er3
77#define S0P	er4
78#define S1P	er5
79#define S2P	er6
80
81#define A0E	e0
82#define A1E	e1
83#define A2E	e2
84#define A3E	e3
85#endif
86
87#define CONCAT(A,B)     A##B
88#define LABEL0(U,X)    CONCAT(U,__##X)
89#define LABEL0_DEF(U,X)    CONCAT(U,__##X##:)
90#define LABEL_DEF(X)       LABEL0_DEF(__USER_LABEL_PREFIX__,X)
91#define LABEL(X)       LABEL0(__USER_LABEL_PREFIX__,X)
92
93#ifdef __H8300H__
94#ifdef __NORMAL_MODE__
95	.h8300hn
96#else
97	.h8300h
98#endif
99#endif
100
101#ifdef __H8300S__
102#ifdef __NORMAL_MODE__
103	.h8300sn
104#else
105	.h8300s
106#endif
107#endif
108#ifdef __H8300SX__
109#ifdef __NORMAL_MODE__
110	.h8300sxn
111#else
112	.h8300sx
113#endif
114#endif
115
116#ifdef L_cmpsi2
117#ifdef __H8300__
118	.section .text
119	.align 2
120	.global LABEL(cmpsi2)
121LABEL_DEF(cmpsi2)
122	cmp.w	A0,A2
123	bne	.L2
124	cmp.w	A1,A3
125	bne	.L4
126	mov.w	#1,A0
127	rts
128.L2:
129	bgt	.L5
130.L3:
131	mov.w	#2,A0
132	rts
133.L4:
134	bls	.L3
135.L5:
136	sub.w	A0,A0
137	rts
138	.end
139#endif
140#endif /* L_cmpsi2 */
141
142#ifdef L_ucmpsi2
143#ifdef __H8300__
144	.section .text
145	.align 2
146	.global LABEL(ucmpsi2)
147LABEL_DEF(ucmpsi2)
148	cmp.w	A0,A2
149	bne	.L2
150	cmp.w	A1,A3
151	bne	.L4
152	mov.w	#1,A0
153	rts
154.L2:
155	bhi	.L5
156.L3:
157	mov.w	#2,A0
158	rts
159.L4:
160	bls	.L3
161.L5:
162	sub.w	A0,A0
163	rts
164	.end
165#endif
166#endif /* L_ucmpsi2 */
167
168#ifdef L_divhi3
169
170;; HImode divides for the H8/300.
171;; We bunch all of this into one object file since there are several
172;; "supporting routines".
173
174; general purpose normalize routine
175;
176; divisor in A0
177; dividend in A1
178; turns both into +ve numbers, and leaves what the answer sign
179; should be in A2L
180
181#ifdef __H8300__
182	.section .text
183	.align 2
184divnorm:
185	or	A0H,A0H		; is divisor > 0
186	stc	ccr,A2L
187	bge	_lab1
188	not	A0H		; no - then make it +ve
189	not	A0L
190	adds	#1,A0
191_lab1:	or	A1H,A1H	; look at dividend
192	bge	_lab2
193	not	A1H		; it is -ve, make it positive
194	not	A1L
195	adds	#1,A1
196	xor	#0x8,A2L; and toggle sign of result
197_lab2:	rts
198;; Basically the same, except that the sign of the divisor determines
199;; the sign.
200modnorm:
201	or	A0H,A0H		; is divisor > 0
202	stc	ccr,A2L
203	bge	_lab7
204	not	A0H		; no - then make it +ve
205	not	A0L
206	adds	#1,A0
207_lab7:	or	A1H,A1H	; look at dividend
208	bge	_lab8
209	not	A1H		; it is -ve, make it positive
210	not	A1L
211	adds	#1,A1
212_lab8:	rts
213
214; A0=A0/A1 signed
215
216	.global	LABEL(divhi3)
217LABEL_DEF(divhi3)
218	bsr	divnorm
219	bsr	LABEL(udivhi3)
220negans:	btst	#3,A2L	; should answer be negative ?
221	beq	_lab4
222	not	A0H	; yes, so make it so
223	not	A0L
224	adds	#1,A0
225_lab4:	rts
226
227; A0=A0%A1 signed
228
229	.global	LABEL(modhi3)
230LABEL_DEF(modhi3)
231	bsr	modnorm
232	bsr	LABEL(udivhi3)
233	mov	A3,A0
234	bra	negans
235
236; A0=A0%A1 unsigned
237
238	.global	LABEL(umodhi3)
239LABEL_DEF(umodhi3)
240	bsr	LABEL(udivhi3)
241	mov	A3,A0
242	rts
243
244; A0=A0/A1 unsigned
245; A3=A0%A1 unsigned
246; A2H trashed
247; D high 8 bits of denom
248; d low 8 bits of denom
249; N high 8 bits of num
250; n low 8 bits of num
251; M high 8 bits of mod
252; m low 8 bits of mod
253; Q high 8 bits of quot
254; q low 8 bits of quot
255; P preserve
256
257; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
258; see how to partition up the expression.
259
260	.global	LABEL(udivhi3)
261LABEL_DEF(udivhi3)
262				; A0 A1 A2 A3
263				; Nn Dd       P
264	sub.w	A3,A3		; Nn Dd xP 00
265	or	A1H,A1H
266	bne	divlongway
267	or	A0H,A0H
268	beq	_lab6
269
270; we know that D == 0 and N is != 0
271	mov.b	A0H,A3L		; Nn Dd xP 0N
272	divxu	A1L,A3		;          MQ
273	mov.b	A3L,A0H	 	; Q
274; dealt with N, do n
275_lab6:	mov.b	A0L,A3L		;           n
276	divxu	A1L,A3		;          mq
277	mov.b	A3L,A0L		; Qq
278	mov.b	A3H,A3L         ;           m
279	mov.b	#0x0,A3H	; Qq       0m
280	rts
281
282; D != 0 - which means the denominator is
283;          loop around to get the result.
284
285divlongway:
286	mov.b	A0H,A3L		; Nn Dd xP 0N
287	mov.b	#0x0,A0H	; high byte of answer has to be zero
288	mov.b	#0x8,A2H	;       8
289div8:	add.b	A0L,A0L		; n*=2
290	rotxl	A3L		; Make remainder bigger
291	rotxl	A3H
292	sub.w	A1,A3		; Q-=N
293	bhs	setbit		; set a bit ?
294	add.w	A1,A3		;  no : too far , Q+=N
295
296	dec	A2H
297	bne	div8		; next bit
298	rts
299
300setbit:	inc	A0L		; do insert bit
301	dec	A2H
302	bne	div8		; next bit
303	rts
304
305#endif /* __H8300__ */
306#endif /* L_divhi3 */
307
308#ifdef L_divsi3
309
310;; 4 byte integer divides for the H8/300.
311;;
312;; We have one routine which does all the work and lots of
313;; little ones which prepare the args and massage the sign.
314;; We bunch all of this into one object file since there are several
315;; "supporting routines".
316
317	.section .text
318	.align 2
319
320; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
321; This function is here to keep branch displacements small.
322
323#ifdef __H8300__
324
325divnorm:
326	mov.b	A0H,A0H		; is the numerator -ve
327	stc	ccr,S2L		; keep the sign in bit 3 of S2L
328	bge	postive
329
330	; negate arg
331	not	A0H
332	not	A1H
333	not	A0L
334	not	A1L
335
336	add	#1,A1L
337	addx	#0,A1H
338	addx	#0,A0L
339	addx	#0,A0H
340postive:
341	mov.b	A2H,A2H		; is the denominator -ve
342	bge	postive2
343	not	A2L
344	not	A2H
345	not	A3L
346	not	A3H
347	add.b	#1,A3L
348	addx	#0,A3H
349	addx	#0,A2L
350	addx	#0,A2H
351	xor.b	#0x08,S2L	; toggle the result sign
352postive2:
353	rts
354
355;; Basically the same, except that the sign of the divisor determines
356;; the sign.
357modnorm:
358	mov.b	A0H,A0H		; is the numerator -ve
359	stc	ccr,S2L		; keep the sign in bit 3 of S2L
360	bge	mpostive
361
362	; negate arg
363	not	A0H
364	not	A1H
365	not	A0L
366	not	A1L
367
368	add	#1,A1L
369	addx	#0,A1H
370	addx	#0,A0L
371	addx	#0,A0H
372mpostive:
373	mov.b	A2H,A2H		; is the denominator -ve
374	bge	mpostive2
375	not	A2L
376	not	A2H
377	not	A3L
378	not	A3H
379	add.b	#1,A3L
380	addx	#0,A3H
381	addx	#0,A2L
382	addx	#0,A2H
383mpostive2:
384	rts
385
386#else /* __H8300H__ */
387
388divnorm:
389	mov.l	A0P,A0P		; is the numerator -ve
390	stc	ccr,S2L		; keep the sign in bit 3 of S2L
391	bge	postive
392
393	neg.l	A0P		; negate arg
394
395postive:
396	mov.l	A1P,A1P		; is the denominator -ve
397	bge	postive2
398
399	neg.l	A1P		; negate arg
400	xor.b	#0x08,S2L	; toggle the result sign
401
402postive2:
403	rts
404
405;; Basically the same, except that the sign of the divisor determines
406;; the sign.
407modnorm:
408	mov.l	A0P,A0P		; is the numerator -ve
409	stc	ccr,S2L		; keep the sign in bit 3 of S2L
410	bge	mpostive
411
412	neg.l	A0P		; negate arg
413
414mpostive:
415	mov.l	A1P,A1P		; is the denominator -ve
416	bge	mpostive2
417
418	neg.l	A1P		; negate arg
419
420mpostive2:
421	rts
422
423#endif
424
425; numerator in A0/A1
426; denominator in A2/A3
427	.global	LABEL(modsi3)
428LABEL_DEF(modsi3)
429#ifdef __H8300__
430	PUSHP	S2P
431	PUSHP	S0P
432	PUSHP	S1P
433	bsr	modnorm
434	bsr	divmodsi4
435	mov	S0,A0
436	mov	S1,A1
437	bra	exitdiv
438#else
439	PUSHP	S2P
440	bsr	modnorm
441	bsr	LABEL(divsi3)
442	mov.l	er3,er0
443	bra	exitdiv
444#endif
445
446	;; H8/300H and H8S version of ___udivsi3 is defined later in
447	;; the file.
448#ifdef __H8300__
449	.global	LABEL(udivsi3)
450LABEL_DEF(udivsi3)
451	PUSHP	S2P
452	PUSHP	S0P
453	PUSHP	S1P
454	bsr	divmodsi4
455	bra	reti
456#endif
457
458	.global	LABEL(umodsi3)
459LABEL_DEF(umodsi3)
460#ifdef __H8300__
461	PUSHP	S2P
462	PUSHP	S0P
463	PUSHP	S1P
464	bsr	divmodsi4
465	mov	S0,A0
466	mov	S1,A1
467	bra	reti
468#else
469	bsr	LABEL(udivsi3)
470	mov.l	er3,er0
471	rts
472#endif
473
474	.global	LABEL(divsi3)
475LABEL_DEF(divsi3)
476#ifdef __H8300__
477	PUSHP	S2P
478	PUSHP	S0P
479	PUSHP	S1P
480	jsr	divnorm
481	jsr	divmodsi4
482#else
483	PUSHP	S2P
484	jsr	divnorm
485	bsr	LABEL(udivsi3)
486#endif
487
488	; examine what the sign should be
489exitdiv:
490	btst	#3,S2L
491	beq	reti
492
493	; should be -ve
494#ifdef __H8300__
495	not	A0H
496	not	A1H
497	not	A0L
498	not	A1L
499
500	add	#1,A1L
501	addx	#0,A1H
502	addx	#0,A0L
503	addx	#0,A0H
504#else /* __H8300H__ */
505	neg.l	A0P
506#endif
507
508reti:
509#ifdef __H8300__
510	POPP	S1P
511	POPP	S0P
512#endif
513	POPP	S2P
514	rts
515
516	; takes A0/A1 numerator (A0P for H8/300H)
517	; A2/A3 denominator (A1P for H8/300H)
518	; returns A0/A1 quotient (A0P for H8/300H)
519	; S0/S1 remainder (S0P for H8/300H)
520	; trashes S2H
521
522#ifdef __H8300__
523
524divmodsi4:
525        sub.w	S0,S0		; zero play area
526        mov.w	S0,S1
527        mov.b	A2H,S2H
528        or	A2L,S2H
529        or	A3H,S2H
530        bne	DenHighNonZero
531        mov.b	A0H,A0H
532        bne	NumByte0Zero
533        mov.b	A0L,A0L
534        bne	NumByte1Zero
535        mov.b	A1H,A1H
536        bne	NumByte2Zero
537        bra	NumByte3Zero
538NumByte0Zero:
539	mov.b	A0H,S1L
540        divxu	A3L,S1
541        mov.b	S1L,A0H
542NumByte1Zero:
543	mov.b	A0L,S1L
544        divxu	A3L,S1
545        mov.b	S1L,A0L
546NumByte2Zero:
547	mov.b	A1H,S1L
548        divxu	A3L,S1
549        mov.b	S1L,A1H
550NumByte3Zero:
551	mov.b	A1L,S1L
552        divxu	A3L,S1
553        mov.b	S1L,A1L
554
555        mov.b	S1H,S1L
556        mov.b	#0x0,S1H
557        rts
558
559; have to do the divide by shift and test
560DenHighNonZero:
561	mov.b	A0H,S1L
562        mov.b	A0L,A0H
563        mov.b	A1H,A0L
564        mov.b	A1L,A1H
565
566        mov.b	#0,A1L
567        mov.b	#24,S2H	; only do 24 iterations
568
569nextbit:
570	add.w	A1,A1	; double the answer guess
571        rotxl	A0L
572        rotxl	A0H
573
574        rotxl	S1L	; double remainder
575        rotxl	S1H
576        rotxl	S0L
577        rotxl	S0H
578        sub.w	A3,S1	; does it all fit
579        subx	A2L,S0L
580        subx	A2H,S0H
581        bhs	setone
582
583        add.w	A3,S1	; no, restore mistake
584        addx	A2L,S0L
585        addx	A2H,S0H
586
587        dec	S2H
588        bne	nextbit
589        rts
590
591setone:
592	inc	A1L
593        dec	S2H
594        bne	nextbit
595        rts
596
597#else /* __H8300H__ */
598
599	;; This function also computes the remainder and stores it in er3.
600	.global	LABEL(udivsi3)
601LABEL_DEF(udivsi3)
602	mov.w	A1E,A1E		; denominator top word 0?
603	bne	DenHighNonZero
604
605	; do it the easy way, see page 107 in manual
606	mov.w	A0E,A2
607	extu.l	A2P
608	divxu.w	A1,A2P
609	mov.w	A2E,A0E
610	divxu.w	A1,A0P
611	mov.w	A0E,A3
612	mov.w	A2,A0E
613	extu.l	A3P
614	rts
615
616 	; er0 = er0 / er1
617 	; er3 = er0 % er1
618 	; trashes er1 er2
619 	; expects er1 >= 2^16
620DenHighNonZero:
621	mov.l	er0,er3
622	mov.l	er1,er2
623#ifdef __H8300H__
624divmod_L21:
625	shlr.l	er0
626	shlr.l	er2		; make divisor < 2^16
627	mov.w	e2,e2
628	bne	divmod_L21
629#else
630	shlr.l	#2,er2		; make divisor < 2^16
631	mov.w	e2,e2
632	beq	divmod_L22A
633divmod_L21:
634	shlr.l	#2,er0
635divmod_L22:
636	shlr.l	#2,er2		; make divisor < 2^16
637	mov.w	e2,e2
638	bne	divmod_L21
639divmod_L22A:
640	rotxl.w	r2
641	bcs	divmod_L23
642	shlr.l	er0
643	bra	divmod_L24
644divmod_L23:
645	rotxr.w	r2
646	shlr.l	#2,er0
647divmod_L24:
648#endif
649	;; At this point,
650	;;  er0 contains shifted dividend
651	;;  er1 contains divisor
652	;;  er2 contains shifted divisor
653	;;  er3 contains dividend, later remainder
654	divxu.w	r2,er0		; r0 now contains the approximate quotient (AQ)
655	extu.l	er0
656	beq	divmod_L25
657	subs	#1,er0		; er0 = AQ - 1
658	mov.w	e1,r2
659	mulxu.w	r0,er2		; er2 = upper (AQ - 1) * divisor
660	sub.w	r2,e3		; dividend - 65536 * er2
661	mov.w	r1,r2
662	mulxu.w	r0,er2		; compute er3 = remainder (tentative)
663	sub.l	er2,er3		; er3 = dividend - (AQ - 1) * divisor
664divmod_L25:
665 	cmp.l	er1,er3		; is divisor < remainder?
666	blo	divmod_L26
667 	adds	#1,er0
668	sub.l	er1,er3		; correct the remainder
669divmod_L26:
670	rts
671
672#endif
673#endif /* L_divsi3 */
674
675#ifdef L_mulhi3
676
677;; HImode multiply.
678; The H8/300 only has an 8*8->16 multiply.
679; The answer is the same as:
680;
681; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
682; (we can ignore A1.h * A0.h cause that will all off the top)
683; A0 in
684; A1 in
685; A0 answer
686
687#ifdef __H8300__
688	.section .text
689	.align 2
690	.global	LABEL(mulhi3)
691LABEL_DEF(mulhi3)
692	mov.b	A1L,A2L		; A2l gets srcb.l
693	mulxu	A0L,A2		; A2 gets first sub product
694
695	mov.b	A0H,A3L		; prepare for
696	mulxu	A1L,A3		; second sub product
697
698	add.b	A3L,A2H		; sum first two terms
699
700	mov.b	A1H,A3L		; third sub product
701	mulxu	A0L,A3
702
703	add.b	A3L,A2H		; almost there
704	mov.w	A2,A0		; that is
705	rts
706
707#endif
708#endif /* L_mulhi3 */
709
710#ifdef L_mulsi3
711
712;; SImode multiply.
713;;
714;; I think that shift and add may be sufficient for this.  Using the
715;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead.  This way
716;; the inner loop uses maybe 20 cycles + overhead, but terminates
717;; quickly on small args.
718;;
719;; A0/A1 src_a
720;; A2/A3 src_b
721;;
722;;  while (a)
723;;    {
724;;      if (a & 1)
725;;        r += b;
726;;      a >>= 1;
727;;      b <<= 1;
728;;    }
729
730	.section .text
731	.align 2
732
733#ifdef __H8300__
734
735	.global	LABEL(mulsi3)
736LABEL_DEF(mulsi3)
737	PUSHP	S0P
738	PUSHP	S1P
739
740	sub.w	S0,S0
741	sub.w	S1,S1
742
743	; while (a)
744_top:	mov.w	A0,A0
745	bne	_more
746	mov.w	A1,A1
747	beq	_done
748_more:	; if (a & 1)
749	bld	#0,A1L
750	bcc	_nobit
751	; r += b
752	add.w	A3,S1
753	addx	A2L,S0L
754	addx	A2H,S0H
755_nobit:
756	; a >>= 1
757	shlr	A0H
758	rotxr	A0L
759	rotxr	A1H
760	rotxr	A1L
761
762	; b <<= 1
763	add.w	A3,A3
764	addx	A2L,A2L
765	addx	A2H,A2H
766	bra 	_top
767
768_done:
769	mov.w	S0,A0
770	mov.w	S1,A1
771	POPP	S1P
772	POPP	S0P
773	rts
774
775#else /* __H8300H__ */
776
777;
778; mulsi3 for H8/300H - based on Renesas SH implementation
779;
780; by Toshiyasu Morita
781;
782; Old code:
783;
784; 16b * 16b = 372 states (worst case)
785; 32b * 32b = 724 states (worst case)
786;
787; New code:
788;
789; 16b * 16b =  48 states
790; 16b * 32b =  72 states
791; 32b * 32b =  92 states
792;
793
794	.global LABEL(mulsi3)
795LABEL_DEF(mulsi3)
796	mov.w	r1,r2   ; ( 2 states) b * d
797	mulxu	r0,er2  ; (22 states)
798
799	mov.w	e0,r3   ; ( 2 states) a * d
800	beq	L_skip1 ; ( 4 states)
801	mulxu	r1,er3  ; (22 states)
802	add.w	r3,e2   ; ( 2 states)
803
804L_skip1:
805	mov.w	e1,r3   ; ( 2 states) c * b
806	beq	L_skip2 ; ( 4 states)
807	mulxu	r0,er3  ; (22 states)
808	add.w	r3,e2   ; ( 2 states)
809
810L_skip2:
811	mov.l	er2,er0	; ( 2 states)
812	rts		; (10 states)
813
814#endif
815#endif /* L_mulsi3 */
816#ifdef L_fixunssfsi_asm
817/* For the h8300 we use asm to save some bytes, to
818   allow more programs to fit into the tiny address
819   space.  For the H8/300H and H8S, the C version is good enough.  */
820#ifdef __H8300__
821/* We still treat NANs different than libgcc2.c, but then, the
822   behavior is undefined anyways.  */
823	.global LABEL(fixunssfsi)
824LABEL_DEF(fixunssfsi)
825	cmp.b #0x4f,r0h
826	bge Large_num
827	jmp     @LABEL(fixsfsi)
828Large_num:
829	bhi L_huge_num
830	xor.b #0x80,A0L
831	bmi L_shift8
832L_huge_num:
833	mov.w #65535,A0
834	mov.w A0,A1
835	rts
836L_shift8:
837	mov.b A0L,A0H
838	mov.b A1H,A0L
839	mov.b A1L,A1H
840	mov.b #0,A1L
841	rts
842#endif
843#endif /* L_fixunssfsi_asm */
844