This commit is contained in:
Xu Liangyu 2025-07-30 15:56:26 +02:00 committed by GitHub
commit 9c2c6dc3c0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 216 additions and 117 deletions

View File

@ -82,14 +82,8 @@ C_FUNC(\Name\()_End):
////NOTE: reg1 and reg2 must be the number and GPR type !!!
.macro PROLOG_SAVE_REG_PAIR reg1, reg2, ofs, __def_cfa_save=0
//#ifdef FEATURE_LOONGSONISA
// //NOTE:The offset of gssq/gslq must be 16-bytes aligned.
// // here ofs must be 16-bytes aligned.
// gssq \reg2, \reg1, \ofs(sp)
//#else
st.d $r\reg1, $sp, \ofs
st.d $r\reg2, $sp, \ofs+8
//#endif
.cfi_rel_offset \reg1, \ofs
.cfi_rel_offset \reg2, \ofs + 8
@ -123,23 +117,15 @@ C_FUNC(\Name\()_End):
.endm
.macro EPILOG_RESTORE_REG_PAIR reg1, reg2, ofs
//#ifdef FEATURE_LOONGSONISA
// gslq \reg2, \reg1, \ofs(sp)
//#else
ld.d $r\reg2, $sp, \ofs+8
ld.d $r\reg1, $sp, \ofs
//#endif
.cfi_restore \reg2
.cfi_restore \reg1
.endm
.macro EPILOG_RESTORE_REG_PAIR_INDEXED reg1, reg2, ssize
//#ifdef FEATURE_LOONGSONISA
// gslq \reg2, \reg1, 0(sp)
//#else
ld.d $r\reg2, $sp, 8
ld.d $r\reg1, $sp, 0
//#endif
.cfi_restore \reg2
.cfi_restore \reg1
@ -183,14 +169,6 @@ C_FUNC(\Name\()_End):
// Reserve 64 bytes of memory before calling SAVE_ARGUMENT_REGISTERS
.macro SAVE_ARGUMENT_REGISTERS reg, ofs
//#ifdef FEATURE_LOONGSONISA
// //NOTE:The offset of gssq/gslq must be 16-bytes aligned.
// // here ofs must be 16-bytes aligned.
// gssq a1, a0, \ofs(\reg)
// gssq a3, a2, \ofs+16(\reg)
// gssq a5, a4, \ofs+32(\reg)
// gssq a7, a6, \ofs+48(\reg)
//#else
st.d $a0, \reg, \ofs
st.d $a1, \reg, \ofs+8
st.d $a2, \reg, \ofs+16
@ -199,21 +177,12 @@ C_FUNC(\Name\()_End):
st.d $a5, \reg, \ofs+40
st.d $a6, \reg, \ofs+48
st.d $a7, \reg, \ofs+56
//#endif
.endm
// Reserve 64 bytes of memory before calling SAVE_FLOAT_ARGUMENT_REGISTERS
.macro SAVE_FLOAT_ARGUMENT_REGISTERS reg, ofs
//#ifdef FEATURE_LOONGSONISA
// //NOTE:The offset of gssqc1/gslqc1 must be 16-bytes aligned.
// // here ofs must be 16-bytes aligned.
// gssqc1 $f13, $f12, \ofs(\reg)
// gssqc1 $f15, $f14, \ofs+16(\reg)
// gssqc1 $f17, $f16, \ofs+32(\reg)
// gssqc1 $f19, $f18, \ofs+48(\reg)
//#else
fst.d $f0, \reg, \ofs
fst.d $f1, \reg, \ofs+8
fst.d $f2, \reg, \ofs+16
@ -222,21 +191,12 @@ C_FUNC(\Name\()_End):
fst.d $f5, \reg, \ofs+40
fst.d $f6, \reg, \ofs+48
fst.d $f7, \reg, \ofs+56
//#endif
.endm
// Reserve 64 bytes of memory before calling SAVE_FLOAT_CALLEESAVED_REGISTERS
.macro SAVE_FLOAT_CALLEESAVED_REGISTERS reg, ofs
//#ifdef FEATURE_LOONGSONISA
// //NOTE:The offset of gssqc1/gslqc1 must be 16-bytes aligned.
// // here ofs must be 16-bytes aligned.
// gssqc1 $f25, $f24, \ofs(\reg)
// gssqc1 $f27, $f26, \ofs+16(\reg)
// gssqc1 $f29, $f28, \ofs+32(\reg)
// gssqc1 $f31, $f30, \ofs+48(\reg)
//#else
fst.d $f24, \reg, \ofs
fst.d $f25, \reg, \ofs+8
fst.d $f26, \reg, \ofs+16
@ -245,7 +205,6 @@ C_FUNC(\Name\()_End):
fst.d $f29, \reg, \ofs+40
fst.d $f30, \reg, \ofs+48
fst.d $f31, \reg, \ofs+56
//#endif
.endm
@ -260,14 +219,6 @@ C_FUNC(\Name\()_End):
.macro RESTORE_ARGUMENT_REGISTERS reg, ofs
//#ifdef FEATURE_LOONGSONISA
// //NOTE:The offset of gssq/gslq must be 16-bytes aligned.
// // here ofs must be 16-bytes aligned.
// gslq a7, a6, \ofs+48(\reg)
// gslq a5, a4, \ofs+32(\reg)
// gslq a3, a2, \ofs+16(\reg)
// gslq a1, a0, \ofs(\reg)
//#else
ld.d $a7, \reg, \ofs+56
ld.d $a6, \reg, \ofs+48
ld.d $a5, \reg, \ofs+40
@ -276,18 +227,11 @@ C_FUNC(\Name\()_End):
ld.d $a2, \reg, \ofs+16
ld.d $a1, \reg, \ofs+8
ld.d $a0, \reg, \ofs
//#endif
.endm
.macro RESTORE_FLOAT_ARGUMENT_REGISTERS reg, ofs
//#ifdef FEATURE_LOONGSONISA
// gslqc1 $f19, $f18, \ofs+48(\reg)
// gslqc1 $f17, $f16, \ofs+32(\reg)
// gslqc1 $f15, $f14, \ofs+16(\reg)
// gslqc1 $f13, $f12, \ofs(\reg)
//#else
fld.d $f7, \reg, \ofs+56
fld.d $f6, \reg, \ofs+48
fld.d $f5, \reg, \ofs+40
@ -296,20 +240,11 @@ C_FUNC(\Name\()_End):
fld.d $f2, \reg, \ofs+16
fld.d $f1, \reg, \ofs+8
fld.d $f0, \reg, \ofs
//#endif
.endm
.macro RESTORE_FLOAT_CALLEESAVED_REGISTERS reg, ofs
//#ifdef FEATURE_LOONGSONISA
// //NOTE:The offset of gssqc1/gslqc1 must be 16-bytes aligned.
// // here ofs must be 16-bytes aligned.
// gslqc1 $f25, $f24, \ofs(\reg)
// gslqc1 $f27, $f26, \ofs+16(\reg)
// gslqc1 $f29, $f28, \ofs+32(\reg)
// gslqc1 $f31, $f30, \ofs+48(\reg)
//#else
fld.d $f24, $r\reg, \ofs
fld.d $f25, $r\reg, \ofs+8
fld.d $f26, $r\reg, \ofs+16
@ -318,7 +253,6 @@ C_FUNC(\Name\()_End):
fld.d $f29, $r\reg, \ofs+40
fld.d $f30, $r\reg, \ofs+48
fld.d $f31, $r\reg, \ofs+56
//#endif
.endm

View File

@ -12,6 +12,9 @@
#define CONTEXT_FLOATING_POINT_BIT (2)
#define CONTEXT_DEBUG_REGISTERS_BIT (3)
#define CONTEXT_HWCAP_LOONGARCH_LSX_BIT (1 << 4)
#define CONTEXT_HWCAP_LOONGARCH_LASX_BIT (1 << 5)
#define CONTEXT_CONTROL (CONTEXT_LOONGARCH64 | (1 << CONTEXT_CONTROL_BIT))
#define CONTEXT_INTEGER (CONTEXT_LOONGARCH64 | (1 << CONTEXT_INTEGER_BIT))
#define CONTEXT_FLOATING_POINT (CONTEXT_LOONGARCH64 | (1 << CONTEXT_FLOATING_POINT_BIT))

View File

@ -16,12 +16,12 @@
LEAF_ENTRY RtlRestoreContext, _TEXT
#ifdef HAS_ADDRESS_SANITIZER
ld.w $r21, $a0, CONTEXT_ContextFlags
andi $r21, $r21, (1 << CONTEXT_CONTROL_BIT)
beq $r21, $r0, LOCAL_LABEL(Restore_CONTEXT_FLOATING_POINT)
andi $r21, $r21, (1 << CONTEXT_CONTROL_BIT)
beqz $r21, LOCAL_LABEL(Restore_CONTEXT_FLOATING_POINT)
addi.d $sp, $sp, -16
st.d a0, $sp, 0
st.d a1, $sp, 8
st.d $a0, $sp, 0
st.d $a1, $sp, 8
bl __asan_handle_no_return
@ -37,17 +37,103 @@ LOCAL_LABEL(Restore_CONTEXT_FLOATING_POINT):
andi $t1, $r21, (1 << CONTEXT_FLOATING_POINT_BIT)
beqz $t1, LOCAL_LABEL(No_Restore_CONTEXT_FLOATING_POINT)
PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 32
st.d $a0, $fp, 16
st.d $a1, $fp, 24
bl C_FUNC(minipal_getcpufeatures)
ori $t1, $a0, 0
ld.d $a0, $fp, 16
ld.d $a1, $fp, 24
EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32
andi $t3, $t1, CONTEXT_HWCAP_LOONGARCH_LASX_BIT
bnez $t3, LOCAL_LABEL(Restore_CONTEXT_LASX)
andi $t3, $t1, CONTEXT_HWCAP_LOONGARCH_LSX_BIT
bnez $t3, LOCAL_LABEL(Restore_CONTEXT_LSX)
// Neither LSX or LASX is supported.
fld.d $f0 , $a0, CONTEXT_FPU_OFFSET
fld.d $f1 , $a0, CONTEXT_FPU_OFFSET + 8*1
fld.d $f2 , $a0, CONTEXT_FPU_OFFSET + 8*2
fld.d $f3 , $a0, CONTEXT_FPU_OFFSET + 8*3
fld.d $f4 , $a0, CONTEXT_FPU_OFFSET + 8*4
fld.d $f5 , $a0, CONTEXT_FPU_OFFSET + 8*5
fld.d $f6 , $a0, CONTEXT_FPU_OFFSET + 8*6
fld.d $f7 , $a0, CONTEXT_FPU_OFFSET + 8*7
fld.d $f8 , $a0, CONTEXT_FPU_OFFSET + 8*8
fld.d $f9 , $a0, CONTEXT_FPU_OFFSET + 8*9
fld.d $f10, $a0, CONTEXT_FPU_OFFSET + 8*10
fld.d $f11, $a0, CONTEXT_FPU_OFFSET + 8*11
fld.d $f12, $a0, CONTEXT_FPU_OFFSET + 8*12
fld.d $f13, $a0, CONTEXT_FPU_OFFSET + 8*13
fld.d $f14, $a0, CONTEXT_FPU_OFFSET + 8*14
fld.d $f15, $a0, CONTEXT_FPU_OFFSET + 8*15
fld.d $f16, $a0, CONTEXT_FPU_OFFSET + 8*16
fld.d $f17, $a0, CONTEXT_FPU_OFFSET + 8*17
fld.d $f18, $a0, CONTEXT_FPU_OFFSET + 8*18
fld.d $f19, $a0, CONTEXT_FPU_OFFSET + 8*19
fld.d $f20, $a0, CONTEXT_FPU_OFFSET + 8*20
fld.d $f21, $a0, CONTEXT_FPU_OFFSET + 8*21
fld.d $f22, $a0, CONTEXT_FPU_OFFSET + 8*22
fld.d $f23, $a0, CONTEXT_FPU_OFFSET + 8*23
fld.d $f24, $a0, CONTEXT_FPU_OFFSET + 8*24
fld.d $f25, $a0, CONTEXT_FPU_OFFSET + 8*25
fld.d $f26, $a0, CONTEXT_FPU_OFFSET + 8*26
fld.d $f27, $a0, CONTEXT_FPU_OFFSET + 8*27
fld.d $f28, $a0, CONTEXT_FPU_OFFSET + 8*28
fld.d $f29, $a0, CONTEXT_FPU_OFFSET + 8*29
fld.d $f30, $a0, CONTEXT_FPU_OFFSET + 8*30
fld.d $f31, $a0, CONTEXT_FPU_OFFSET + 8*31
b LOCAL_LABEL(Restore_CONTEXT_FLOATING_CONTROL)
LOCAL_LABEL(Restore_CONTEXT_LSX):
// 128-bits SIMD:LSX.
vld $vr0 , $a0, CONTEXT_FPU_OFFSET
vld $vr1 , $a0, CONTEXT_FPU_OFFSET + 16*1
vld $vr2 , $a0, CONTEXT_FPU_OFFSET + 16*2
vld $vr3 , $a0, CONTEXT_FPU_OFFSET + 16*3
vld $vr4 , $a0, CONTEXT_FPU_OFFSET + 16*4
vld $vr5 , $a0, CONTEXT_FPU_OFFSET + 16*5
vld $vr6 , $a0, CONTEXT_FPU_OFFSET + 16*6
vld $vr7 , $a0, CONTEXT_FPU_OFFSET + 16*7
vld $vr8 , $a0, CONTEXT_FPU_OFFSET + 16*8
vld $vr9 , $a0, CONTEXT_FPU_OFFSET + 16*9
vld $vr10, $a0, CONTEXT_FPU_OFFSET + 16*10
vld $vr11, $a0, CONTEXT_FPU_OFFSET + 16*11
vld $vr12, $a0, CONTEXT_FPU_OFFSET + 16*12
vld $vr13, $a0, CONTEXT_FPU_OFFSET + 16*13
vld $vr14, $a0, CONTEXT_FPU_OFFSET + 16*14
vld $vr15, $a0, CONTEXT_FPU_OFFSET + 16*15
vld $vr16, $a0, CONTEXT_FPU_OFFSET + 16*16
vld $vr17, $a0, CONTEXT_FPU_OFFSET + 16*17
vld $vr18, $a0, CONTEXT_FPU_OFFSET + 16*18
vld $vr19, $a0, CONTEXT_FPU_OFFSET + 16*19
vld $vr20, $a0, CONTEXT_FPU_OFFSET + 16*20
vld $vr21, $a0, CONTEXT_FPU_OFFSET + 16*21
vld $vr22, $a0, CONTEXT_FPU_OFFSET + 16*22
vld $vr23, $a0, CONTEXT_FPU_OFFSET + 16*23
vld $vr24, $a0, CONTEXT_FPU_OFFSET + 16*24
vld $vr25, $a0, CONTEXT_FPU_OFFSET + 16*25
vld $vr26, $a0, CONTEXT_FPU_OFFSET + 16*26
vld $vr27, $a0, CONTEXT_FPU_OFFSET + 16*27
vld $vr28, $a0, CONTEXT_FPU_OFFSET + 16*28
vld $vr29, $a0, CONTEXT_FPU_OFFSET + 16*29
vld $vr30, $a0, CONTEXT_FPU_OFFSET + 16*30
vld $vr31, $a0, CONTEXT_FPU_OFFSET + 16*31
b LOCAL_LABEL(Restore_CONTEXT_FLOATING_CONTROL)
LOCAL_LABEL(Restore_CONTEXT_LASX):
// 256-bits SIMD:LASX.
xvld $xr0, $a0, CONTEXT_FPU_OFFSET + 0
xvld $xr1, $a0, CONTEXT_FPU_OFFSET + 32*1
xvld $xr2, $a0, CONTEXT_FPU_OFFSET + 32*2
xvld $xr3, $a0, CONTEXT_FPU_OFFSET + 32*3
xvld $xr4, $a0, CONTEXT_FPU_OFFSET + 32*4
xvld $xr5, $a0, CONTEXT_FPU_OFFSET + 32*5
xvld $xr6, $a0, CONTEXT_FPU_OFFSET + 32*6
xvld $xr7, $a0, CONTEXT_FPU_OFFSET + 32*7
xvld $xr8, $a0, CONTEXT_FPU_OFFSET + 32*8
xvld $xr9, $a0, CONTEXT_FPU_OFFSET + 32*9
xvld $xr0 , $a0, CONTEXT_FPU_OFFSET
xvld $xr1 , $a0, CONTEXT_FPU_OFFSET + 32*1
xvld $xr2 , $a0, CONTEXT_FPU_OFFSET + 32*2
xvld $xr3 , $a0, CONTEXT_FPU_OFFSET + 32*3
xvld $xr4 , $a0, CONTEXT_FPU_OFFSET + 32*4
xvld $xr5 , $a0, CONTEXT_FPU_OFFSET + 32*5
xvld $xr6 , $a0, CONTEXT_FPU_OFFSET + 32*6
xvld $xr7 , $a0, CONTEXT_FPU_OFFSET + 32*7
xvld $xr8 , $a0, CONTEXT_FPU_OFFSET + 32*8
xvld $xr9 , $a0, CONTEXT_FPU_OFFSET + 32*9
xvld $xr10, $a0, CONTEXT_FPU_OFFSET + 32*10
xvld $xr11, $a0, CONTEXT_FPU_OFFSET + 32*11
xvld $xr12, $a0, CONTEXT_FPU_OFFSET + 32*12
@ -71,6 +157,7 @@ LOCAL_LABEL(Restore_CONTEXT_FLOATING_POINT):
xvld $xr30, $a0, CONTEXT_FPU_OFFSET + 32*30
xvld $xr31, $a0, CONTEXT_FPU_OFFSET + 32*31
LOCAL_LABEL(Restore_CONTEXT_FLOATING_CONTROL):
ld.d $t1, $a0, CONTEXT_FLOAT_CONTROL_OFFSET
movgr2cf $fcc0, $t1
srli.d $t1, $t1, 8
@ -127,7 +214,7 @@ LOCAL_LABEL(No_Restore_CONTEXT_FLOATING_POINT):
LOCAL_LABEL(No_Restore_CONTEXT_INTEGER):
andi $r21, $r21, (1 << CONTEXT_CONTROL_BIT)
beq $r21, $r0, LOCAL_LABEL(No_Restore_CONTEXT_CONTROL)
beqz $r21, LOCAL_LABEL(No_Restore_CONTEXT_CONTROL)
ld.d $ra, $t4, CONTEXT_Ra
ld.d $fp, $t4, CONTEXT_Fp
@ -218,8 +305,92 @@ LOCAL_LABEL(Done_CONTEXT_INTEGER):
andi $t3, $t1, (1 << CONTEXT_FLOATING_POINT_BIT)
beqz $t3, LOCAL_LABEL(Done_CONTEXT_FLOATING_POINT)
PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 32
st.d $a0, $fp, 16
bl C_FUNC(minipal_getcpufeatures)
ori $t1, $a0, 0
ld.d $a0, $fp, 16
EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32
andi $t3, $t1, CONTEXT_HWCAP_LOONGARCH_LASX_BIT
bnez $t3, LOCAL_LABEL(Store_CONTEXT_LASX)
andi $t3, $t1, CONTEXT_HWCAP_LOONGARCH_LSX_BIT
bnez $t3, LOCAL_LABEL(Store_CONTEXT_LSX)
// Neither LSX or LASX is supported.
fst.d $f0 , $a0, CONTEXT_FPU_OFFSET
fst.d $f1 , $a0, CONTEXT_FPU_OFFSET + 8*1
fst.d $f2 , $a0, CONTEXT_FPU_OFFSET + 8*2
fst.d $f3 , $a0, CONTEXT_FPU_OFFSET + 8*3
fst.d $f4 , $a0, CONTEXT_FPU_OFFSET + 8*4
fst.d $f5 , $a0, CONTEXT_FPU_OFFSET + 8*5
fst.d $f6 , $a0, CONTEXT_FPU_OFFSET + 8*6
fst.d $f7 , $a0, CONTEXT_FPU_OFFSET + 8*7
fst.d $f8 , $a0, CONTEXT_FPU_OFFSET + 8*8
fst.d $f9 , $a0, CONTEXT_FPU_OFFSET + 8*9
fst.d $f10, $a0, CONTEXT_FPU_OFFSET + 8*10
fst.d $f11, $a0, CONTEXT_FPU_OFFSET + 8*11
fst.d $f12, $a0, CONTEXT_FPU_OFFSET + 8*12
fst.d $f13, $a0, CONTEXT_FPU_OFFSET + 8*13
fst.d $f14, $a0, CONTEXT_FPU_OFFSET + 8*14
fst.d $f15, $a0, CONTEXT_FPU_OFFSET + 8*15
fst.d $f16, $a0, CONTEXT_FPU_OFFSET + 8*16
fst.d $f17, $a0, CONTEXT_FPU_OFFSET + 8*17
fst.d $f18, $a0, CONTEXT_FPU_OFFSET + 8*18
fst.d $f19, $a0, CONTEXT_FPU_OFFSET + 8*19
fst.d $f20, $a0, CONTEXT_FPU_OFFSET + 8*20
fst.d $f21, $a0, CONTEXT_FPU_OFFSET + 8*21
fst.d $f22, $a0, CONTEXT_FPU_OFFSET + 8*22
fst.d $f23, $a0, CONTEXT_FPU_OFFSET + 8*23
fst.d $f24, $a0, CONTEXT_FPU_OFFSET + 8*24
fst.d $f25, $a0, CONTEXT_FPU_OFFSET + 8*25
fst.d $f26, $a0, CONTEXT_FPU_OFFSET + 8*26
fst.d $f27, $a0, CONTEXT_FPU_OFFSET + 8*27
fst.d $f28, $a0, CONTEXT_FPU_OFFSET + 8*28
fst.d $f29, $a0, CONTEXT_FPU_OFFSET + 8*29
fst.d $f30, $a0, CONTEXT_FPU_OFFSET + 8*30
fst.d $f31, $a0, CONTEXT_FPU_OFFSET + 8*31
b LOCAL_LABEL(Store_CONTEXT_FLOAT_CONTROL)
LOCAL_LABEL(Store_CONTEXT_LSX):
// 128-bits SIMD:LSX.
vst $vr0 , $a0, CONTEXT_FPU_OFFSET
vst $vr1 , $a0, CONTEXT_FPU_OFFSET + 16*1
vst $vr2 , $a0, CONTEXT_FPU_OFFSET + 16*2
vst $vr3 , $a0, CONTEXT_FPU_OFFSET + 16*3
vst $vr4 , $a0, CONTEXT_FPU_OFFSET + 16*4
vst $vr5 , $a0, CONTEXT_FPU_OFFSET + 16*5
vst $vr6 , $a0, CONTEXT_FPU_OFFSET + 16*6
vst $vr7 , $a0, CONTEXT_FPU_OFFSET + 16*7
vst $vr8 , $a0, CONTEXT_FPU_OFFSET + 16*8
vst $vr9 , $a0, CONTEXT_FPU_OFFSET + 16*9
vst $vr10, $a0, CONTEXT_FPU_OFFSET + 16*10
vst $vr11, $a0, CONTEXT_FPU_OFFSET + 16*11
vst $vr12, $a0, CONTEXT_FPU_OFFSET + 16*12
vst $vr13, $a0, CONTEXT_FPU_OFFSET + 16*13
vst $vr14, $a0, CONTEXT_FPU_OFFSET + 16*14
vst $vr15, $a0, CONTEXT_FPU_OFFSET + 16*15
vst $vr16, $a0, CONTEXT_FPU_OFFSET + 16*16
vst $vr17, $a0, CONTEXT_FPU_OFFSET + 16*17
vst $vr18, $a0, CONTEXT_FPU_OFFSET + 16*18
vst $vr19, $a0, CONTEXT_FPU_OFFSET + 16*19
vst $vr20, $a0, CONTEXT_FPU_OFFSET + 16*20
vst $vr21, $a0, CONTEXT_FPU_OFFSET + 16*21
vst $vr22, $a0, CONTEXT_FPU_OFFSET + 16*22
vst $vr23, $a0, CONTEXT_FPU_OFFSET + 16*23
vst $vr24, $a0, CONTEXT_FPU_OFFSET + 16*24
vst $vr25, $a0, CONTEXT_FPU_OFFSET + 16*25
vst $vr26, $a0, CONTEXT_FPU_OFFSET + 16*26
vst $vr27, $a0, CONTEXT_FPU_OFFSET + 16*27
vst $vr28, $a0, CONTEXT_FPU_OFFSET + 16*28
vst $vr29, $a0, CONTEXT_FPU_OFFSET + 16*29
vst $vr30, $a0, CONTEXT_FPU_OFFSET + 16*30
vst $vr31, $a0, CONTEXT_FPU_OFFSET + 16*31
b LOCAL_LABEL(Store_CONTEXT_FLOAT_CONTROL)
LOCAL_LABEL(Store_CONTEXT_LASX):
// 256-bits SIMD:LASX.
xvst $xr0 , $a0, CONTEXT_FPU_OFFSET + 32*0
xvst $xr0 , $a0, CONTEXT_FPU_OFFSET
xvst $xr1 , $a0, CONTEXT_FPU_OFFSET + 32*1
xvst $xr2 , $a0, CONTEXT_FPU_OFFSET + 32*2
xvst $xr3 , $a0, CONTEXT_FPU_OFFSET + 32*3
@ -252,6 +423,7 @@ LOCAL_LABEL(Done_CONTEXT_INTEGER):
xvst $xr30, $a0, CONTEXT_FPU_OFFSET + 32*30
xvst $xr31, $a0, CONTEXT_FPU_OFFSET + 32*31
LOCAL_LABEL(Store_CONTEXT_FLOAT_CONTROL):
ori $t0, $r0, 0
movcf2gr $t0, $fcc0
st.b $t0, $a0, CONTEXT_FLOAT_CONTROL_OFFSET

View File

@ -364,7 +364,6 @@ NESTED_ENTRY NDirectImportThunk, _TEXT, NoHandler
// $fp,$ra
PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 0xa0
//PROLOG_SAVE_REG gp, 16
SAVE_ARGUMENT_REGISTERS $sp, 0x20
SAVE_FLOAT_ARGUMENT_REGISTERS $sp, 0x60
@ -375,7 +374,6 @@ NESTED_ENTRY NDirectImportThunk, _TEXT, NoHandler
// pop the stack and restore original register state
RESTORE_FLOAT_ARGUMENT_REGISTERS $sp, 0x60
RESTORE_ARGUMENT_REGISTERS $sp, 0x20
//EPILOG_RESTORE_REG gp, 16
// $fp,$ra
EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 0xa0
@ -406,7 +404,6 @@ NESTED_ENTRY VirtualMethodFixupStub, _TEXT, NoHandler
// Save arguments and return address
// $fp,$ra
PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 0xa0
//PROLOG_SAVE_REG gp, 16
SAVE_ARGUMENT_REGISTERS $sp, 32
SAVE_FLOAT_ARGUMENT_REGISTERS $sp, 96
@ -425,7 +422,6 @@ NESTED_ENTRY VirtualMethodFixupStub, _TEXT, NoHandler
// pop the stack and restore original register state
RESTORE_FLOAT_ARGUMENT_REGISTERS $sp, 96
RESTORE_ARGUMENT_REGISTERS $sp, 32
//EPILOG_RESTORE_REG gp, 16
// $fp,$ra
EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 0xa0
@ -476,7 +472,6 @@ NESTED_ENTRY TheUMEntryPrestub, _TEXT, UnhandledExceptionHandlerUnix
// Save arguments and return address
// $fp,$ra
PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 0xa0
//PROLOG_SAVE_REG gp, 16
SAVE_ARGUMENT_REGISTERS $sp, 32
SAVE_FLOAT_ARGUMENT_REGISTERS $sp, 96
@ -488,7 +483,6 @@ NESTED_ENTRY TheUMEntryPrestub, _TEXT, UnhandledExceptionHandlerUnix
// pop the stack and restore original register state
RESTORE_FLOAT_ARGUMENT_REGISTERS $sp, 96
RESTORE_ARGUMENT_REGISTERS $sp, 32
//EPILOG_RESTORE_REG gp, 16
// $fp,$ra
EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 0xa0
@ -604,34 +598,6 @@ LOCAL_LABEL(Fail):
b C_FUNC(ResolveWorkerAsmStub) // call the ResolveWorkerAsmStub method to transition into the VM
NESTED_END ResolveWorkerChainLookupAsmStub, _TEXT
//NOTE: Frame_Size = SIZEOF__ArgumentRegisters + SIZEOF__FloatArgumentRegisters + extra.
//
// |gp |
// |s0 |
// |$t2 |
// |t9 |
// |$a7 |
// |$a6 |
// |$a5 |
// |$a4 |
// |$a3 |
// |$a2 |
// |$a1 |
// |$a0 |
// |$ra | $sp+8
// |fp | $sp
//
// |f19 | if needed.
// |f18 |
// |f17 |
// |f16 |
// |f15 |
// |f14 |
// |f13 |
// |f12 |
//
// ------------------------------------------------------------------
// void ResolveWorkerAsmStub(args in regs $a0-$a7 & stack, t8:IndirectionCellAndFlags, $t2:DispatchToken)
//

View File

@ -32,7 +32,6 @@ EXTERN_C VOID STDCALL PrecodeRemotingThunk();
#elif defined(TARGET_LOONGARCH64)
#define SIZEOF_PRECODE_BASE CODE_SIZE_ALIGN
#define SHIFTOF_PRECODE_TYPE 5
#elif defined(TARGET_RISCV64)

View File

@ -614,6 +614,26 @@ int minipal_getcpufeatures(void)
#endif // HOST_RISCV64
#if defined(HOST_LOONGARCH64)
#if defined(HOST_UNIX)
#if HAVE_AUXV_HWCAP_H
unsigned long hwCap = getauxval(AT_HWCAP);
if (hwCap & HWCAP_LOONGARCH_LSX)
result |= LoongArch64IntrinsicConstants_LSX;
if (hwCap & HWCAP_LOONGARCH_LASX)
result |= LoongArch64IntrinsicConstants_LASX;
#endif // HAVE_GETAUXVAL
#endif // HOST_UNIX
#endif // HOST_LOONGARCH64
return result;
}

View File

@ -58,6 +58,11 @@ static_assert((1 << ARM64_ATOMICS_FEATURE_FLAG_BIT) == ARM64IntrinsicConstants_A
#define RiscV64IntrinsicConstants_Zbb (1 << 1)
#endif // HOST_RISCV64
#if defined(HOST_LOONGARCH64)
#define LoongArch64IntrinsicConstants_LSX (1 << 4)
#define LoongArch64IntrinsicConstants_LASX (1 << 5)
#endif // HOST_LOONGARCH64
#ifdef __cplusplus
extern "C"
{