config/alpha/lib1funcs.asm

*404b540aSrobert/* DEC Alpha division and remainder support.
*404b540aSrobert   Copyright (C) 1994, 1999 Free Software Foundation, Inc.
*404b540aSrobert
*404b540aSrobertThis file is free software; you can redistribute it and/or modify it
*404b540aSrobertunder the terms of the GNU General Public License as published by the
*404b540aSrobertFree Software Foundation; either version 2, or (at your option) any
*404b540aSrobertlater version.
*404b540aSrobert
*404b540aSrobertIn addition to the permissions in the GNU General Public License, the
*404b540aSrobertFree Software Foundation gives you unlimited permission to link the
*404b540aSrobertcompiled version of this file into combinations with other programs,
*404b540aSrobertand to distribute those combinations without any restriction coming
*404b540aSrobertfrom the use of this file.  (The General Public License restrictions
*404b540aSrobertdo apply in other respects; for example, they cover modification of
*404b540aSrobertthe file, and distribution when not linked into a combine
*404b540aSrobertexecutable.)
*404b540aSrobert
*404b540aSrobertThis file is distributed in the hope that it will be useful, but
*404b540aSrobertWITHOUT ANY WARRANTY; without even the implied warranty of
*404b540aSrobertMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
*404b540aSrobertGeneral Public License for more details.
*404b540aSrobert
*404b540aSrobertYou should have received a copy of the GNU General Public License
*404b540aSrobertalong with this program; see the file COPYING.  If not, write to
*404b540aSrobertthe Free Software Foundation, 51 Franklin Street, Fifth Floor,
*404b540aSrobertBoston, MA 02110-1301, USA.  */
*404b540aSrobert
*404b540aSrobert/* This had to be written in assembler because the division functions
*404b540aSrobert   use a non-standard calling convention.
*404b540aSrobert
*404b540aSrobert   This file provides an implementation of __divqu, __divq, __divlu,
*404b540aSrobert   __divl, __remqu, __remq, __remlu and __reml.  CPP macros control
*404b540aSrobert   the exact operation.
*404b540aSrobert
*404b540aSrobert   Operation performed: $27 := $24 o $25, clobber $28, return address to
*404b540aSrobert   caller in $23, where o one of the operations.
*404b540aSrobert
*404b540aSrobert   The following macros need to be defined:
*404b540aSrobert
*404b540aSrobert	SIZE, the number of bits, 32 or 64.
*404b540aSrobert
*404b540aSrobert	TYPE, either UNSIGNED or SIGNED
*404b540aSrobert
*404b540aSrobert	OPERATION, either DIVISION or REMAINDER
*404b540aSrobert
*404b540aSrobert	SPECIAL_CALLING_CONVENTION, 0 or 1.  It is useful for debugging to
*404b540aSrobert	define this to 0.  That removes the `__' prefix to make the function
*404b540aSrobert	name not collide with the existing libc.a names, and uses the
*404b540aSrobert	standard Alpha procedure calling convention.
*404b540aSrobert*/
*404b540aSrobert
*404b540aSrobert#ifndef SPECIAL_CALLING_CONVENTION
*404b540aSrobert#define SPECIAL_CALLING_CONVENTION 1
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert#ifdef L_divl
*404b540aSrobert#if SPECIAL_CALLING_CONVENTION
*404b540aSrobert#define FUNCTION_NAME __divl
*404b540aSrobert#else
*404b540aSrobert#define FUNCTION_NAME divl
*404b540aSrobert#endif
*404b540aSrobert#define SIZE 32
*404b540aSrobert#define TYPE SIGNED
*404b540aSrobert#define OPERATION DIVISION
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert#ifdef L_divlu
*404b540aSrobert#if SPECIAL_CALLING_CONVENTION
*404b540aSrobert#define FUNCTION_NAME __divlu
*404b540aSrobert#else
*404b540aSrobert#define FUNCTION_NAME divlu
*404b540aSrobert#endif
*404b540aSrobert#define SIZE 32
*404b540aSrobert#define TYPE UNSIGNED
*404b540aSrobert#define OPERATION DIVISION
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert#ifdef L_divq
*404b540aSrobert#if SPECIAL_CALLING_CONVENTION
*404b540aSrobert#define FUNCTION_NAME __divq
*404b540aSrobert#else
*404b540aSrobert#define FUNCTION_NAME divq
*404b540aSrobert#endif
*404b540aSrobert#define SIZE 64
*404b540aSrobert#define TYPE SIGNED
*404b540aSrobert#define OPERATION DIVISION
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert#ifdef L_divqu
*404b540aSrobert#if SPECIAL_CALLING_CONVENTION
*404b540aSrobert#define FUNCTION_NAME __divqu
*404b540aSrobert#else
*404b540aSrobert#define FUNCTION_NAME divqu
*404b540aSrobert#endif
*404b540aSrobert#define SIZE 64
*404b540aSrobert#define TYPE UNSIGNED
*404b540aSrobert#define OPERATION DIVISION
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert#ifdef L_reml
*404b540aSrobert#if SPECIAL_CALLING_CONVENTION
*404b540aSrobert#define FUNCTION_NAME __reml
*404b540aSrobert#else
*404b540aSrobert#define FUNCTION_NAME reml
*404b540aSrobert#endif
*404b540aSrobert#define SIZE 32
*404b540aSrobert#define TYPE SIGNED
*404b540aSrobert#define OPERATION REMAINDER
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert#ifdef L_remlu
*404b540aSrobert#if SPECIAL_CALLING_CONVENTION
*404b540aSrobert#define FUNCTION_NAME __remlu
*404b540aSrobert#else
*404b540aSrobert#define FUNCTION_NAME remlu
*404b540aSrobert#endif
*404b540aSrobert#define SIZE 32
*404b540aSrobert#define TYPE UNSIGNED
*404b540aSrobert#define OPERATION REMAINDER
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert#ifdef L_remq
*404b540aSrobert#if SPECIAL_CALLING_CONVENTION
*404b540aSrobert#define FUNCTION_NAME __remq
*404b540aSrobert#else
*404b540aSrobert#define FUNCTION_NAME remq
*404b540aSrobert#endif
*404b540aSrobert#define SIZE 64
*404b540aSrobert#define TYPE SIGNED
*404b540aSrobert#define OPERATION REMAINDER
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert#ifdef L_remqu
*404b540aSrobert#if SPECIAL_CALLING_CONVENTION
*404b540aSrobert#define FUNCTION_NAME __remqu
*404b540aSrobert#else
*404b540aSrobert#define FUNCTION_NAME remqu
*404b540aSrobert#endif
*404b540aSrobert#define SIZE 64
*404b540aSrobert#define TYPE UNSIGNED
*404b540aSrobert#define OPERATION REMAINDER
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert#define tmp0 $3
*404b540aSrobert#define tmp1 $28
*404b540aSrobert#define cnt $1
*404b540aSrobert#define result_sign $2
*404b540aSrobert
*404b540aSrobert#if SPECIAL_CALLING_CONVENTION
*404b540aSrobert#define N $24
*404b540aSrobert#define D $25
*404b540aSrobert#define Q RETREG
*404b540aSrobert#define RETREG $27
*404b540aSrobert#else
*404b540aSrobert#define N $16
*404b540aSrobert#define D $17
*404b540aSrobert#define Q RETREG
*404b540aSrobert#define RETREG $0
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert/* Misc symbols to make alpha assembler easier to read.  */
*404b540aSrobert#define zero $31
*404b540aSrobert#define sp $30
*404b540aSrobert
*404b540aSrobert/* Symbols to make interface nicer.  */
*404b540aSrobert#define UNSIGNED 0
*404b540aSrobert#define SIGNED 1
*404b540aSrobert#define DIVISION 0
*404b540aSrobert#define REMAINDER 1
*404b540aSrobert
*404b540aSrobert	.set noreorder
*404b540aSrobert	.set noat
*404b540aSrobert.text
*404b540aSrobert	.align 3
*404b540aSrobert	.globl FUNCTION_NAME
*404b540aSrobert	.ent FUNCTION_NAME
*404b540aSrobertFUNCTION_NAME:
*404b540aSrobert
*404b540aSrobert	.frame	$30,0,$26,0
*404b540aSrobert	.prologue 0
*404b540aSrobert
*404b540aSrobert/* Under the special calling convention, we have to preserve all register
*404b540aSrobert   values but $23 and $28.  */
*404b540aSrobert#if SPECIAL_CALLING_CONVENTION
*404b540aSrobert	lda	sp,-64(sp)
*404b540aSrobert#if OPERATION == DIVISION
*404b540aSrobert	stq	N,0(sp)
*404b540aSrobert#endif
*404b540aSrobert	stq	D,8(sp)
*404b540aSrobert	stq	cnt,16(sp)
*404b540aSrobert	stq	result_sign,24(sp)
*404b540aSrobert	stq	tmp0,32(sp)
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert/* If we are computing the remainder, move N to the register that is used
*404b540aSrobert   for the return value, and redefine what register is used for N.  */
*404b540aSrobert#if OPERATION == REMAINDER
*404b540aSrobert	bis	N,N,RETREG
*404b540aSrobert#undef N
*404b540aSrobert#define N RETREG
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert/* Perform conversion from 32 bit types to 64 bit types.  */
*404b540aSrobert#if SIZE == 32
*404b540aSrobert#if TYPE == SIGNED
*404b540aSrobert	/* If there are problems with the signed case, add these instructions.
*404b540aSrobert	   The caller should already have done this.
*404b540aSrobert	addl	N,0,N		# sign extend N
*404b540aSrobert	addl	D,0,D		# sign extend D
*404b540aSrobert	*/
*404b540aSrobert#else /* UNSIGNED */
*404b540aSrobert	zap	N,0xf0,N	# zero extend N (caller required to sign extend)
*404b540aSrobert	zap	D,0xf0,D	# zero extend D
*404b540aSrobert#endif
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert/* Check for divide by zero.  */
*404b540aSrobert	bne	D,$34
*404b540aSrobert	lda	$16,-2(zero)
*404b540aSrobert	call_pal 0xaa
*404b540aSrobert$34:
*404b540aSrobert
*404b540aSrobert#if TYPE == SIGNED
*404b540aSrobert#if OPERATION == DIVISION
*404b540aSrobert	xor	N,D,result_sign
*404b540aSrobert#else
*404b540aSrobert	bis	N,N,result_sign
*404b540aSrobert#endif
*404b540aSrobert/* Get the absolute values of N and D.  */
*404b540aSrobert	subq	zero,N,tmp0
*404b540aSrobert	cmovlt	N,tmp0,N
*404b540aSrobert	subq	zero,D,tmp0
*404b540aSrobert	cmovlt	D,tmp0,D
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert/* Compute CNT = ceil(log2(N)) - ceil(log2(D)).  This is the number of
*404b540aSrobert   divide iterations we will have to perform.  Should you wish to optimize
*404b540aSrobert   this, check a few bits at a time, preferably using zap/zapnot.  Be
*404b540aSrobert   careful though, this code runs fast fro the most common cases, when the
*404b540aSrobert   quotient is small.  */
*404b540aSrobert	bge	N,$35
*404b540aSrobert	bis	zero,1,cnt
*404b540aSrobert	blt	D,$40
*404b540aSrobert	.align	3
*404b540aSrobert$39:	addq	D,D,D
*404b540aSrobert	addl	cnt,1,cnt
*404b540aSrobert	bge	D,$39
*404b540aSrobert	br	zero,$40
*404b540aSrobert$35:	cmpult	N,D,tmp0
*404b540aSrobert	bis	zero,zero,cnt
*404b540aSrobert	bne	tmp0,$42
*404b540aSrobert	.align	3
*404b540aSrobert$44:	addq	D,D,D
*404b540aSrobert	cmpult	N,D,tmp0
*404b540aSrobert	addl	cnt,1,cnt
*404b540aSrobert	beq	tmp0,$44
*404b540aSrobert$42:	srl	D,1,D
*404b540aSrobert$40:
*404b540aSrobert	subl	cnt,1,cnt
*404b540aSrobert
*404b540aSrobert
*404b540aSrobert/* Actual divide.  Could be optimized with unrolling.  */
*404b540aSrobert#if OPERATION == DIVISION
*404b540aSrobert	bis	zero,zero,Q
*404b540aSrobert#endif
*404b540aSrobert	blt	cnt,$46
*404b540aSrobert	.align	3
*404b540aSrobert$49:	cmpule	D,N,tmp1
*404b540aSrobert	subq	N,D,tmp0
*404b540aSrobert	srl	D,1,D
*404b540aSrobert	subl	cnt,1,cnt
*404b540aSrobert	cmovne	tmp1,tmp0,N
*404b540aSrobert#if OPERATION == DIVISION
*404b540aSrobert	addq	Q,Q,Q
*404b540aSrobert	bis	Q,tmp1,Q
*404b540aSrobert#endif
*404b540aSrobert	bge	cnt,$49
*404b540aSrobert$46:
*404b540aSrobert
*404b540aSrobert
*404b540aSrobert/* The result is now in RETREG.  NOTE!  It was written to RETREG using
*404b540aSrobert   either N or Q as a synonym!  */
*404b540aSrobert
*404b540aSrobert
*404b540aSrobert/* Change the sign of the result as needed.  */
*404b540aSrobert#if TYPE == SIGNED
*404b540aSrobert	subq	zero,RETREG,tmp0
*404b540aSrobert	cmovlt	result_sign,tmp0,RETREG
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert
*404b540aSrobert/* Restore clobbered registers.  */
*404b540aSrobert#if SPECIAL_CALLING_CONVENTION
*404b540aSrobert#if OPERATION == DIVISION
*404b540aSrobert	ldq	N,0(sp)
*404b540aSrobert#endif
*404b540aSrobert	ldq	D,8(sp)
*404b540aSrobert	ldq	cnt,16(sp)
*404b540aSrobert	ldq	result_sign,24(sp)
*404b540aSrobert	ldq	tmp0,32(sp)
*404b540aSrobert
*404b540aSrobert	lda	sp,64(sp)
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert
*404b540aSrobert/* Sign extend an *unsigned* 32 bit result, as required by the Alpha
*404b540aSrobert   conventions.  */
*404b540aSrobert#if TYPE == UNSIGNED && SIZE == 32
*404b540aSrobert	/* This could be avoided by adding some CPP hair to the divide loop.
*404b540aSrobert	   It is probably not worth the added complexity.  */
*404b540aSrobert	addl	RETREG,0,RETREG
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert
*404b540aSrobert#if SPECIAL_CALLING_CONVENTION
*404b540aSrobert	ret	zero,($23),1
*404b540aSrobert#else
*404b540aSrobert	ret	zero,($26),1
*404b540aSrobert#endif
*404b540aSrobert	.end	FUNCTION_NAME