492 lines
		
	
	
		
			9.6 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
			
		
		
	
	
			492 lines
		
	
	
		
			9.6 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
| //===----------------------Hexagon builtin routine ------------------------===//
 | |
| //
 | |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | |
| // See https://llvm.org/LICENSE.txt for license information.
 | |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| // Double Precision Divide
 | |
| 
 | |
| #define A r1:0
 | |
| #define AH r1
 | |
| #define AL r0
 | |
| 
 | |
| #define B r3:2
 | |
| #define BH r3
 | |
| #define BL r2
 | |
| 
 | |
| #define Q r5:4
 | |
| #define QH r5
 | |
| #define QL r4
 | |
| 
 | |
| #define PROD r7:6
 | |
| #define PRODHI r7
 | |
| #define PRODLO r6
 | |
| 
 | |
| #define SFONE r8
 | |
| #define SFDEN r9
 | |
| #define SFERROR r10
 | |
| #define SFRECIP r11
 | |
| 
 | |
| #define EXPBA r13:12
 | |
| #define EXPB r13
 | |
| #define EXPA r12
 | |
| 
 | |
| #define REMSUB2 r15:14
 | |
| 
 | |
| 
 | |
| 
 | |
| #define SIGN r28
 | |
| 
 | |
| #define Q_POSITIVE p3
 | |
| #define NORMAL p2
 | |
| #define NO_OVF_UNF p1
 | |
| #define P_TMP p0
 | |
| 
 | |
| #define RECIPEST_SHIFT 3
 | |
| #define QADJ 61
 | |
| 
 | |
| #define DFCLASS_NORMAL 0x02
 | |
| #define DFCLASS_NUMBER 0x0F
 | |
| #define DFCLASS_INFINITE 0x08
 | |
| #define DFCLASS_ZERO 0x01
 | |
| #define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO)
 | |
| #define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE)
 | |
| 
 | |
| #define DF_MANTBITS 52
 | |
| #define DF_EXPBITS 11
 | |
| #define SF_MANTBITS 23
 | |
| #define SF_EXPBITS 8
 | |
| #define DF_BIAS 0x3ff
 | |
| 
 | |
| #define SR_ROUND_OFF 22
 | |
| 
 | |
| #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
 | |
| #define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
 | |
| #define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
 | |
| #define END(TAG) .size TAG,.-TAG
 | |
| 
 | |
| 	.text
 | |
| 	.global __hexagon_divdf3
 | |
| 	.type __hexagon_divdf3,@function
 | |
| 	Q6_ALIAS(divdf3)
 | |
|         FAST_ALIAS(divdf3)
 | |
|         FAST2_ALIAS(divdf3)
 | |
| 	.p2align 5
 | |
| __hexagon_divdf3:
 | |
| 	{
 | |
| 		NORMAL = dfclass(A,#DFCLASS_NORMAL)
 | |
| 		NORMAL = dfclass(B,#DFCLASS_NORMAL)
 | |
| 		EXPBA = combine(BH,AH)
 | |
| 		SIGN = xor(AH,BH)
 | |
| 	}
 | |
| #undef A
 | |
| #undef AH
 | |
| #undef AL
 | |
| #undef B
 | |
| #undef BH
 | |
| #undef BL
 | |
| #define REM r1:0
 | |
| #define REMHI r1
 | |
| #define REMLO r0
 | |
| #define DENOM r3:2
 | |
| #define DENOMHI r3
 | |
| #define DENOMLO r2
 | |
| 	{
 | |
| 		if (!NORMAL) jump .Ldiv_abnormal
 | |
| 		PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
 | |
| 		SFONE = ##0x3f800001
 | |
| 	}
 | |
| 	{
 | |
| 		SFDEN = or(SFONE,PRODLO)
 | |
| 		EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
 | |
| 		EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
 | |
| 		Q_POSITIVE = cmp.gt(SIGN,#-1)
 | |
| 	}
 | |
| #undef SIGN
 | |
| #define ONE r28
 | |
| .Ldenorm_continue:
 | |
| 	{
 | |
| 		SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN)
 | |
| 		SFERROR = and(SFONE,#-2)
 | |
| 		ONE = #1
 | |
| 		EXPA = sub(EXPA,EXPB)
 | |
| 	}
 | |
| #undef EXPB
 | |
| #define RECIPEST r13
 | |
| 	{
 | |
| 		SFERROR -= sfmpy(SFRECIP,SFDEN):lib
 | |
| 		REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
 | |
| 		RECIPEST = ##0x00800000 << RECIPEST_SHIFT
 | |
| 	}
 | |
| 	{
 | |
| 		SFRECIP += sfmpy(SFRECIP,SFERROR):lib
 | |
| 		DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
 | |
| 		SFERROR = and(SFONE,#-2)
 | |
| 	}
 | |
| 	{
 | |
| 		SFERROR -= sfmpy(SFRECIP,SFDEN):lib
 | |
| 		QH = #-DF_BIAS+1
 | |
| 		QL = #DF_BIAS-1
 | |
| 	}
 | |
| 	{
 | |
| 		SFRECIP += sfmpy(SFRECIP,SFERROR):lib
 | |
| 		NO_OVF_UNF = cmp.gt(EXPA,QH)
 | |
| 		NO_OVF_UNF = !cmp.gt(EXPA,QL)
 | |
| 	}
 | |
| 	{
 | |
| 		RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT)
 | |
| 		Q = #0
 | |
| 		EXPA = add(EXPA,#-QADJ)
 | |
| 	}
 | |
| #undef SFERROR
 | |
| #undef SFRECIP
 | |
| #define TMP r10
 | |
| #define TMP1 r11
 | |
| 	{
 | |
| 		RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT))
 | |
| 	}
 | |
| 
 | |
| #define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \
 | |
| 	{ \
 | |
| 		PROD = mpyu(RECIPEST,REMHI); \
 | |
| 		REM = asl(REM,# ## ( REMSHIFT )); \
 | |
| 	}; \
 | |
| 	{ \
 | |
| 		PRODLO = # ## 0; \
 | |
| 		REM -= mpyu(PRODHI,DENOMLO); \
 | |
| 		REMSUB2 = mpyu(PRODHI,DENOMHI); \
 | |
| 	}; \
 | |
| 	{ \
 | |
| 		Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \
 | |
| 		REM -= asl(REMSUB2, # ## 32); \
 | |
| 		EXTRA \
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	DIV_ITER1B(ASL,14,15,)
 | |
| 	DIV_ITER1B(ASR,1,15,)
 | |
| 	DIV_ITER1B(ASR,16,15,)
 | |
| 	DIV_ITER1B(ASR,31,15,PROD=# ( 0 );)
 | |
| 
 | |
| #undef REMSUB2
 | |
| #define TMPPAIR r15:14
 | |
| #define TMPPAIRHI r15
 | |
| #define TMPPAIRLO r14
 | |
| #undef RECIPEST
 | |
| #define EXPB r13
 | |
| 	{
 | |
| 		// compare or sub with carry
 | |
| 		TMPPAIR = sub(REM,DENOM)
 | |
| 		P_TMP = cmp.gtu(DENOM,REM)
 | |
| 		// set up amt to add to q
 | |
| 		if (!P_TMP.new) PRODLO  = #2
 | |
| 	}
 | |
| 	{
 | |
| 		Q = add(Q,PROD)
 | |
| 		if (!P_TMP) REM = TMPPAIR
 | |
| 		TMPPAIR = #0
 | |
| 	}
 | |
| 	{
 | |
| 		P_TMP = cmp.eq(REM,TMPPAIR)
 | |
| 		if (!P_TMP.new) QL = or(QL,ONE)
 | |
| 	}
 | |
| 	{
 | |
| 		PROD = neg(Q)
 | |
| 	}
 | |
| 	{
 | |
| 		if (!Q_POSITIVE) Q = PROD
 | |
| 	}
 | |
| #undef REM
 | |
| #undef REMHI
 | |
| #undef REMLO
 | |
| #undef DENOM
 | |
| #undef DENOMLO
 | |
| #undef DENOMHI
 | |
| #define A r1:0
 | |
| #define AH r1
 | |
| #define AL r0
 | |
| #define B r3:2
 | |
| #define BH r3
 | |
| #define BL r2
 | |
| 	{
 | |
| 		A = convert_d2df(Q)
 | |
| 		if (!NO_OVF_UNF) jump .Ldiv_ovf_unf
 | |
| 	}
 | |
| 	{
 | |
| 		AH += asl(EXPA,#DF_MANTBITS-32)
 | |
| 		jumpr r31
 | |
| 	}
 | |
| 
 | |
| .Ldiv_ovf_unf:
 | |
| 	{
 | |
| 		AH += asl(EXPA,#DF_MANTBITS-32)
 | |
| 		EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
 | |
| 	}
 | |
| 	{
 | |
| 		PROD = abs(Q)
 | |
| 		EXPA = add(EXPA,EXPB)
 | |
| 	}
 | |
| 	{
 | |
| 		P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS)		// overflow
 | |
| 		if (P_TMP.new) jump:nt .Ldiv_ovf
 | |
| 	}
 | |
| 	{
 | |
| 		P_TMP = cmp.gt(EXPA,#0)
 | |
| 		if (P_TMP.new) jump:nt .Lpossible_unf		// round up to normal possible...
 | |
| 	}
 | |
| 	// Underflow
 | |
| 	// We know what the infinite range exponent should be (EXPA)
 | |
| 	// Q is 2's complement, PROD is abs(Q)
 | |
| 	// Normalize Q, shift right, add a high bit, convert, change exponent
 | |
| 
 | |
| #define FUDGE1 7	// how much to shift right
 | |
| #define FUDGE2 4	// how many guard/round to keep at lsbs
 | |
| 
 | |
| 	{
 | |
| 		EXPB = add(clb(PROD),#-1)			// doesn't need to be added in since
 | |
| 		EXPA = sub(#FUDGE1,EXPA)			// we extract post-converted exponent
 | |
| 		TMP = USR
 | |
| 		TMP1 = #63
 | |
| 	}
 | |
| 	{
 | |
| 		EXPB = min(EXPA,TMP1)
 | |
| 		TMP1 = or(TMP,#0x030)
 | |
| 		PROD = asl(PROD,EXPB)
 | |
| 		EXPA = #0
 | |
| 	}
 | |
| 	{
 | |
| 		TMPPAIR = extractu(PROD,EXPBA)				// bits that will get shifted out
 | |
| 		PROD = lsr(PROD,EXPB)					// shift out bits
 | |
| 		B = #1
 | |
| 	}
 | |
| 	{
 | |
| 		P_TMP = cmp.gtu(B,TMPPAIR)
 | |
| 		if (!P_TMP.new) PRODLO = or(BL,PRODLO)
 | |
| 		PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2)
 | |
| 	}
 | |
| 	{
 | |
| 		Q = neg(PROD)
 | |
| 		P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1)
 | |
| 		if (!P_TMP.new) TMP = TMP1
 | |
| 	}
 | |
| 	{
 | |
| 		USR = TMP
 | |
| 		if (Q_POSITIVE) Q = PROD
 | |
| 		TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2)
 | |
| 	}
 | |
| 	{
 | |
| 		A = convert_d2df(Q)
 | |
| 	}
 | |
| 	{
 | |
| 		AH += asl(TMP,#DF_MANTBITS-32)
 | |
| 		jumpr r31
 | |
| 	}
 | |
| 
 | |
| 
 | |
| .Lpossible_unf:
 | |
| 	// If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal
 | |
| 	// The answer is correct, but we need to raise Underflow
 | |
| 	{
 | |
| 		B = extractu(A,#63,#0)
 | |
| 		TMPPAIR = combine(##0x00100000,#0)		// min normal
 | |
| 		TMP = #0x7FFF
 | |
| 	}
 | |
| 	{
 | |
| 		P_TMP = dfcmp.eq(TMPPAIR,B)		// Is everything zero in the rounded value...
 | |
| 		P_TMP = bitsset(PRODHI,TMP)		// but a bunch of bits set in the unrounded abs(quotient)?
 | |
| 	}
 | |
| 
 | |
| #if (__HEXAGON_ARCH__ == 60)
 | |
| 		TMP = USR		// If not, just return
 | |
| 		if (!P_TMP) jumpr r31   // Else, we want to set Unf+Inexact
 | |
| 					// Note that inexact is already set...
 | |
| #else
 | |
| 	{
 | |
| 		if (!P_TMP) jumpr r31			// If not, just return
 | |
| 		TMP = USR				// Else, we want to set Unf+Inexact
 | |
| 	}						// Note that inexact is already set...
 | |
| #endif
 | |
| 	{
 | |
| 		TMP = or(TMP,#0x30)
 | |
| 	}
 | |
| 	{
 | |
| 		USR = TMP
 | |
| 	}
 | |
| 	{
 | |
| 		p0 = dfcmp.eq(A,A)
 | |
| 		jumpr r31
 | |
| 	}
 | |
| 
 | |
| .Ldiv_ovf:
 | |
| 
 | |
| 	// Raise Overflow, and choose the correct overflow value (saturated normal or infinity)
 | |
| 
 | |
| 	{
 | |
| 		TMP = USR
 | |
| 		B = combine(##0x7fefffff,#-1)
 | |
| 		AH = mux(Q_POSITIVE,#0,#-1)
 | |
| 	}
 | |
| 	{
 | |
| 		PROD = combine(##0x7ff00000,#0)
 | |
| 		QH = extractu(TMP,#2,#SR_ROUND_OFF)
 | |
| 		TMP = or(TMP,#0x28)
 | |
| 	}
 | |
| 	{
 | |
| 		USR = TMP
 | |
| 		QH ^= lsr(AH,#31)
 | |
| 		QL = QH
 | |
| 	}
 | |
| 	{
 | |
| 		p0 = !cmp.eq(QL,#1)		// if not round-to-zero
 | |
| 		p0 = !cmp.eq(QH,#2)		// and not rounding the other way
 | |
| 		if (p0.new) B = PROD		// go to inf
 | |
| 		p0 = dfcmp.eq(B,B)		// get exceptions
 | |
| 	}
 | |
| 	{
 | |
| 		A = insert(B,#63,#0)
 | |
| 		jumpr r31
 | |
| 	}
 | |
| 
 | |
| #undef ONE
 | |
| #define SIGN r28
 | |
| #undef NORMAL
 | |
| #undef NO_OVF_UNF
 | |
| #define P_INF p1
 | |
| #define P_ZERO p2
 | |
| .Ldiv_abnormal:
 | |
| 	{
 | |
| 		P_TMP = dfclass(A,#DFCLASS_NUMBER)
 | |
| 		P_TMP = dfclass(B,#DFCLASS_NUMBER)
 | |
| 		Q_POSITIVE = cmp.gt(SIGN,#-1)
 | |
| 	}
 | |
| 	{
 | |
| 		P_INF = dfclass(A,#DFCLASS_INFINITE)
 | |
| 		P_INF = dfclass(B,#DFCLASS_INFINITE)
 | |
| 	}
 | |
| 	{
 | |
| 		P_ZERO = dfclass(A,#DFCLASS_ZERO)
 | |
| 		P_ZERO = dfclass(B,#DFCLASS_ZERO)
 | |
| 	}
 | |
| 	{
 | |
| 		if (!P_TMP) jump .Ldiv_nan
 | |
| 		if (P_INF) jump .Ldiv_invalid
 | |
| 	}
 | |
| 	{
 | |
| 		if (P_ZERO) jump .Ldiv_invalid
 | |
| 	}
 | |
| 	{
 | |
| 		P_ZERO = dfclass(A,#DFCLASS_NONZERO)		// nonzero
 | |
| 		P_ZERO = dfclass(B,#DFCLASS_NONINFINITE)	// non-infinite
 | |
| 	}
 | |
| 	{
 | |
| 		P_INF = dfclass(A,#DFCLASS_NONINFINITE)	// non-infinite
 | |
| 		P_INF = dfclass(B,#DFCLASS_NONZERO)	// nonzero
 | |
| 	}
 | |
| 	{
 | |
| 		if (!P_ZERO) jump .Ldiv_zero_result
 | |
| 		if (!P_INF) jump .Ldiv_inf_result
 | |
| 	}
 | |
| 	// Now we've narrowed it down to (de)normal / (de)normal
 | |
| 	// Set up A/EXPA B/EXPB and go back
 | |
| #undef P_ZERO
 | |
| #undef P_INF
 | |
| #define P_TMP2 p1
 | |
| 	{
 | |
| 		P_TMP = dfclass(A,#DFCLASS_NORMAL)
 | |
| 		P_TMP2 = dfclass(B,#DFCLASS_NORMAL)
 | |
| 		TMP = ##0x00100000
 | |
| 	}
 | |
| 	{
 | |
| 		EXPBA = combine(BH,AH)
 | |
| 		AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32)		// clear out hidden bit, sign bit
 | |
| 		BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32)		// clear out hidden bit, sign bit
 | |
| 	}
 | |
| 	{
 | |
| 		if (P_TMP) AH = or(AH,TMP)				// if normal, add back in hidden bit
 | |
| 		if (P_TMP2) BH = or(BH,TMP)				// if normal, add back in hidden bit
 | |
| 	}
 | |
| 	{
 | |
| 		QH = add(clb(A),#-DF_EXPBITS)
 | |
| 		QL = add(clb(B),#-DF_EXPBITS)
 | |
| 		TMP = #1
 | |
| 	}
 | |
| 	{
 | |
| 		EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
 | |
| 		EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
 | |
| 	}
 | |
| 	{
 | |
| 		A = asl(A,QH)
 | |
| 		B = asl(B,QL)
 | |
| 		if (!P_TMP) EXPA = sub(TMP,QH)
 | |
| 		if (!P_TMP2) EXPB = sub(TMP,QL)
 | |
| 	}	// recreate values needed by resume coke
 | |
| 	{
 | |
| 		PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
 | |
| 	}
 | |
| 	{
 | |
| 		SFDEN = or(SFONE,PRODLO)
 | |
| 		jump .Ldenorm_continue
 | |
| 	}
 | |
| 
 | |
| .Ldiv_zero_result:
 | |
| 	{
 | |
| 		AH = xor(AH,BH)
 | |
| 		B = #0
 | |
| 	}
 | |
| 	{
 | |
| 		A = insert(B,#63,#0)
 | |
| 		jumpr r31
 | |
| 	}
 | |
| .Ldiv_inf_result:
 | |
| 	{
 | |
| 		p2 = dfclass(B,#DFCLASS_ZERO)
 | |
| 		p2 = dfclass(A,#DFCLASS_NONINFINITE)
 | |
| 	}
 | |
| 	{
 | |
| 		TMP = USR
 | |
| 		if (!p2) jump 1f
 | |
| 		AH = xor(AH,BH)
 | |
| 	}
 | |
| 	{
 | |
| 		TMP = or(TMP,#0x04)		// DBZ
 | |
| 	}
 | |
| 	{
 | |
| 		USR = TMP
 | |
| 	}
 | |
| 1:
 | |
| 	{
 | |
| 		B = combine(##0x7ff00000,#0)
 | |
| 		p0 = dfcmp.uo(B,B)		// take possible exception
 | |
| 	}
 | |
| 	{
 | |
| 		A = insert(B,#63,#0)
 | |
| 		jumpr r31
 | |
| 	}
 | |
| .Ldiv_nan:
 | |
| 	{
 | |
| 		p0 = dfclass(A,#0x10)
 | |
| 		p1 = dfclass(B,#0x10)
 | |
| 		if (!p0.new) A = B
 | |
| 		if (!p1.new) B = A
 | |
| 	}
 | |
| 	{
 | |
| 		QH = convert_df2sf(A)	// get possible invalid exceptions
 | |
| 		QL = convert_df2sf(B)
 | |
| 	}
 | |
| 	{
 | |
| 		A = #-1
 | |
| 		jumpr r31
 | |
| 	}
 | |
| 
 | |
| .Ldiv_invalid:
 | |
| 	{
 | |
| 		TMP = ##0x7f800001
 | |
| 	}
 | |
| 	{
 | |
| 		A = convert_sf2df(TMP)		// get invalid, get DF qNaN
 | |
| 		jumpr r31
 | |
| 	}
 | |
| END(__hexagon_divdf3)
 |