forked from OSchip/llvm-project
R600: Use correct encoding for Vertex Fetch instructions on Cayman
Reviewed-by: Vincent Lejeune<vljn at ovi.com> llvm-svn: 184016
This commit is contained in:
parent
6aa0d5578d
commit
ecf9d86404
|
|
@ -99,7 +99,9 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
||||||
} else if (IS_VTX(Desc)) {
|
} else if (IS_VTX(Desc)) {
|
||||||
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
|
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
|
||||||
uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
|
uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
|
||||||
InstWord2 |= 1 << 19;
|
if (!(STI.getFeatureBits() & AMDGPU::FeatureCaymanISA)) {
|
||||||
|
InstWord2 |= 1 << 19; // Mega-Fetch bit
|
||||||
|
}
|
||||||
|
|
||||||
Emit(InstWord01, OS);
|
Emit(InstWord01, OS);
|
||||||
Emit(InstWord2, OS);
|
Emit(InstWord2, OS);
|
||||||
|
|
|
||||||
|
|
@ -166,28 +166,46 @@ class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 {
|
||||||
|
|
||||||
class VTX_WORD0 {
|
class VTX_WORD0 {
|
||||||
field bits<32> Word0;
|
field bits<32> Word0;
|
||||||
bits<7> SRC_GPR;
|
bits<7> src_gpr;
|
||||||
bits<5> VC_INST;
|
bits<5> VC_INST;
|
||||||
bits<2> FETCH_TYPE;
|
bits<2> FETCH_TYPE;
|
||||||
bits<1> FETCH_WHOLE_QUAD;
|
bits<1> FETCH_WHOLE_QUAD;
|
||||||
bits<8> BUFFER_ID;
|
bits<8> BUFFER_ID;
|
||||||
bits<1> SRC_REL;
|
bits<1> SRC_REL;
|
||||||
bits<2> SRC_SEL_X;
|
bits<2> SRC_SEL_X;
|
||||||
bits<6> MEGA_FETCH_COUNT;
|
|
||||||
|
|
||||||
let Word0{4-0} = VC_INST;
|
let Word0{4-0} = VC_INST;
|
||||||
let Word0{6-5} = FETCH_TYPE;
|
let Word0{6-5} = FETCH_TYPE;
|
||||||
let Word0{7} = FETCH_WHOLE_QUAD;
|
let Word0{7} = FETCH_WHOLE_QUAD;
|
||||||
let Word0{15-8} = BUFFER_ID;
|
let Word0{15-8} = BUFFER_ID;
|
||||||
let Word0{22-16} = SRC_GPR;
|
let Word0{22-16} = src_gpr;
|
||||||
let Word0{23} = SRC_REL;
|
let Word0{23} = SRC_REL;
|
||||||
let Word0{25-24} = SRC_SEL_X;
|
let Word0{25-24} = SRC_SEL_X;
|
||||||
|
}
|
||||||
|
|
||||||
|
class VTX_WORD0_eg : VTX_WORD0 {
|
||||||
|
|
||||||
|
bits<6> MEGA_FETCH_COUNT;
|
||||||
|
|
||||||
let Word0{31-26} = MEGA_FETCH_COUNT;
|
let Word0{31-26} = MEGA_FETCH_COUNT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class VTX_WORD0_cm : VTX_WORD0 {
|
||||||
|
|
||||||
|
bits<2> SRC_SEL_Y;
|
||||||
|
bits<2> STRUCTURED_READ;
|
||||||
|
bits<1> LDS_REQ;
|
||||||
|
bits<1> COALESCED_READ;
|
||||||
|
|
||||||
|
let Word0{27-26} = SRC_SEL_Y;
|
||||||
|
let Word0{29-28} = STRUCTURED_READ;
|
||||||
|
let Word0{30} = LDS_REQ;
|
||||||
|
let Word0{31} = COALESCED_READ;
|
||||||
|
}
|
||||||
|
|
||||||
class VTX_WORD1_GPR {
|
class VTX_WORD1_GPR {
|
||||||
field bits<32> Word1;
|
field bits<32> Word1;
|
||||||
bits<7> DST_GPR;
|
bits<7> dst_gpr;
|
||||||
bits<1> DST_REL;
|
bits<1> DST_REL;
|
||||||
bits<3> DST_SEL_X;
|
bits<3> DST_SEL_X;
|
||||||
bits<3> DST_SEL_Y;
|
bits<3> DST_SEL_Y;
|
||||||
|
|
@ -199,7 +217,7 @@ class VTX_WORD1_GPR {
|
||||||
bits<1> FORMAT_COMP_ALL;
|
bits<1> FORMAT_COMP_ALL;
|
||||||
bits<1> SRF_MODE_ALL;
|
bits<1> SRF_MODE_ALL;
|
||||||
|
|
||||||
let Word1{6-0} = DST_GPR;
|
let Word1{6-0} = dst_gpr;
|
||||||
let Word1{7} = DST_REL;
|
let Word1{7} = DST_REL;
|
||||||
let Word1{8} = 0; // Reserved
|
let Word1{8} = 0; // Reserved
|
||||||
let Word1{11-9} = DST_SEL_X;
|
let Word1{11-9} = DST_SEL_X;
|
||||||
|
|
|
||||||
|
|
@ -261,6 +261,50 @@ class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> mask, dag outs,
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class VTX_READ <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
|
||||||
|
: InstR600ISA <outs, (ins MEMxi:$src_gpr), name, pattern>,
|
||||||
|
VTX_WORD1_GPR {
|
||||||
|
|
||||||
|
// Static fields
|
||||||
|
let DST_REL = 0;
|
||||||
|
// The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
|
||||||
|
// FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
|
||||||
|
// however, based on my testing if USE_CONST_FIELDS is set, then all
|
||||||
|
// these fields need to be set to 0.
|
||||||
|
let USE_CONST_FIELDS = 0;
|
||||||
|
let NUM_FORMAT_ALL = 1;
|
||||||
|
let FORMAT_COMP_ALL = 0;
|
||||||
|
let SRF_MODE_ALL = 0;
|
||||||
|
|
||||||
|
let Inst{63-32} = Word1;
|
||||||
|
// LLVM can only encode 64-bit instructions, so these fields are manually
|
||||||
|
// encoded in R600CodeEmitter
|
||||||
|
//
|
||||||
|
// bits<16> OFFSET;
|
||||||
|
// bits<2> ENDIAN_SWAP = 0;
|
||||||
|
// bits<1> CONST_BUF_NO_STRIDE = 0;
|
||||||
|
// bits<1> MEGA_FETCH = 0;
|
||||||
|
// bits<1> ALT_CONST = 0;
|
||||||
|
// bits<2> BUFFER_INDEX_MODE = 0;
|
||||||
|
|
||||||
|
// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
|
||||||
|
// is done in R600CodeEmitter
|
||||||
|
//
|
||||||
|
// Inst{79-64} = OFFSET;
|
||||||
|
// Inst{81-80} = ENDIAN_SWAP;
|
||||||
|
// Inst{82} = CONST_BUF_NO_STRIDE;
|
||||||
|
// Inst{83} = MEGA_FETCH;
|
||||||
|
// Inst{84} = ALT_CONST;
|
||||||
|
// Inst{86-85} = BUFFER_INDEX_MODE;
|
||||||
|
// Inst{95-86} = 0; Reserved
|
||||||
|
|
||||||
|
// VTX_WORD3 (Padding)
|
||||||
|
//
|
||||||
|
// Inst{127-96} = 0;
|
||||||
|
|
||||||
|
let VTXInst = 1;
|
||||||
|
}
|
||||||
|
|
||||||
class LoadParamFrag <PatFrag load_type> : PatFrag <
|
class LoadParamFrag <PatFrag load_type> : PatFrag <
|
||||||
(ops node:$ptr), (load_type node:$ptr),
|
(ops node:$ptr), (load_type node:$ptr),
|
||||||
[{ return isParamLoad(dyn_cast<LoadSDNode>(N)); }]
|
[{ return isParamLoad(dyn_cast<LoadSDNode>(N)); }]
|
||||||
|
|
@ -1249,6 +1293,133 @@ def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg <
|
||||||
[(global_store v4i32:$rw_gpr, i32:$index_gpr)]
|
[(global_store v4i32:$rw_gpr, i32:$index_gpr)]
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
|
||||||
|
: VTX_WORD0_eg, VTX_READ<name, buffer_id, outs, pattern> {
|
||||||
|
|
||||||
|
// Static fields
|
||||||
|
let VC_INST = 0;
|
||||||
|
let FETCH_TYPE = 2;
|
||||||
|
let FETCH_WHOLE_QUAD = 0;
|
||||||
|
let BUFFER_ID = buffer_id;
|
||||||
|
let SRC_REL = 0;
|
||||||
|
// XXX: We can infer this field based on the SRC_GPR. This would allow us
|
||||||
|
// to store vertex addresses in any channel, not just X.
|
||||||
|
let SRC_SEL_X = 0;
|
||||||
|
|
||||||
|
let Inst{31-0} = Word0;
|
||||||
|
}
|
||||||
|
|
||||||
|
class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
|
||||||
|
: VTX_READ_eg <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id,
|
||||||
|
(outs R600_TReg32_X:$dst_gpr), pattern> {
|
||||||
|
|
||||||
|
let MEGA_FETCH_COUNT = 1;
|
||||||
|
let DST_SEL_X = 0;
|
||||||
|
let DST_SEL_Y = 7; // Masked
|
||||||
|
let DST_SEL_Z = 7; // Masked
|
||||||
|
let DST_SEL_W = 7; // Masked
|
||||||
|
let DATA_FORMAT = 1; // FMT_8
|
||||||
|
}
|
||||||
|
|
||||||
|
class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
|
||||||
|
: VTX_READ_eg <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id,
|
||||||
|
(outs R600_TReg32_X:$dst_gpr), pattern> {
|
||||||
|
let MEGA_FETCH_COUNT = 2;
|
||||||
|
let DST_SEL_X = 0;
|
||||||
|
let DST_SEL_Y = 7; // Masked
|
||||||
|
let DST_SEL_Z = 7; // Masked
|
||||||
|
let DST_SEL_W = 7; // Masked
|
||||||
|
let DATA_FORMAT = 5; // FMT_16
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
|
||||||
|
: VTX_READ_eg <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id,
|
||||||
|
(outs R600_TReg32_X:$dst_gpr), pattern> {
|
||||||
|
|
||||||
|
let MEGA_FETCH_COUNT = 4;
|
||||||
|
let DST_SEL_X = 0;
|
||||||
|
let DST_SEL_Y = 7; // Masked
|
||||||
|
let DST_SEL_Z = 7; // Masked
|
||||||
|
let DST_SEL_W = 7; // Masked
|
||||||
|
let DATA_FORMAT = 0xD; // COLOR_32
|
||||||
|
|
||||||
|
// This is not really necessary, but there were some GPU hangs that appeared
|
||||||
|
// to be caused by ALU instructions in the next instruction group that wrote
|
||||||
|
// to the $src_gpr registers of the VTX_READ.
|
||||||
|
// e.g.
|
||||||
|
// %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
|
||||||
|
// %T2_X<def> = MOV %ZERO
|
||||||
|
//Adding this constraint prevents this from happening.
|
||||||
|
let Constraints = "$src_gpr.ptr = $dst_gpr";
|
||||||
|
}
|
||||||
|
|
||||||
|
class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
|
||||||
|
: VTX_READ_eg <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id,
|
||||||
|
(outs R600_Reg128:$dst_gpr), pattern> {
|
||||||
|
|
||||||
|
let MEGA_FETCH_COUNT = 16;
|
||||||
|
let DST_SEL_X = 0;
|
||||||
|
let DST_SEL_Y = 1;
|
||||||
|
let DST_SEL_Z = 2;
|
||||||
|
let DST_SEL_W = 3;
|
||||||
|
let DATA_FORMAT = 0x22; // COLOR_32_32_32_32
|
||||||
|
|
||||||
|
// XXX: Need to force VTX_READ_128 instructions to write to the same register
|
||||||
|
// that holds its buffer address to avoid potential hangs. We can't use
|
||||||
|
// the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst
|
||||||
|
// registers are different sizes.
|
||||||
|
}
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// VTX Read from parameter memory space
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
|
||||||
|
[(set i32:$dst_gpr, (load_param_zexti8 ADDRVTX_READ:$src_gpr))]
|
||||||
|
>;
|
||||||
|
|
||||||
|
def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
|
||||||
|
[(set i32:$dst_gpr, (load_param_zexti16 ADDRVTX_READ:$src_gpr))]
|
||||||
|
>;
|
||||||
|
|
||||||
|
def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
|
||||||
|
[(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
|
||||||
|
>;
|
||||||
|
|
||||||
|
def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
|
||||||
|
[(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
|
||||||
|
>;
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// VTX Read from global memory space
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// 8-bit reads
|
||||||
|
def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
|
||||||
|
[(set i32:$dst_gpr, (zextloadi8_global ADDRVTX_READ:$src_gpr))]
|
||||||
|
>;
|
||||||
|
|
||||||
|
// 32-bit reads
|
||||||
|
def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
|
||||||
|
[(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
|
||||||
|
>;
|
||||||
|
|
||||||
|
// 128-bit reads
|
||||||
|
def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
|
||||||
|
[(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
|
||||||
|
>;
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Constant Loads
|
||||||
|
// XXX: We are currently storing all constants in the global address space.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
|
||||||
|
[(set i32:$dst_gpr, (constant_load ADDRVTX_READ:$src_gpr))]
|
||||||
|
>;
|
||||||
|
|
||||||
|
|
||||||
} // End Predicates = [isEG]
|
} // End Predicates = [isEG]
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
@ -1403,176 +1574,7 @@ let hasSideEffects = 1 in {
|
||||||
let END_OF_PROGRAM = 1;
|
let END_OF_PROGRAM = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
} // End Predicates = [isEGorCayman]
|
||||||
// Memory read/write instructions
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
|
|
||||||
: InstR600ISA <outs, (ins MEMxi:$ptr), name, pattern>,
|
|
||||||
VTX_WORD1_GPR, VTX_WORD0 {
|
|
||||||
|
|
||||||
// Static fields
|
|
||||||
let VC_INST = 0;
|
|
||||||
let FETCH_TYPE = 2;
|
|
||||||
let FETCH_WHOLE_QUAD = 0;
|
|
||||||
let BUFFER_ID = buffer_id;
|
|
||||||
let SRC_REL = 0;
|
|
||||||
// XXX: We can infer this field based on the SRC_GPR. This would allow us
|
|
||||||
// to store vertex addresses in any channel, not just X.
|
|
||||||
let SRC_SEL_X = 0;
|
|
||||||
let DST_REL = 0;
|
|
||||||
// The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL,
|
|
||||||
// FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored,
|
|
||||||
// however, based on my testing if USE_CONST_FIELDS is set, then all
|
|
||||||
// these fields need to be set to 0.
|
|
||||||
let USE_CONST_FIELDS = 0;
|
|
||||||
let NUM_FORMAT_ALL = 1;
|
|
||||||
let FORMAT_COMP_ALL = 0;
|
|
||||||
let SRF_MODE_ALL = 0;
|
|
||||||
|
|
||||||
let Inst{31-0} = Word0;
|
|
||||||
let Inst{63-32} = Word1;
|
|
||||||
// LLVM can only encode 64-bit instructions, so these fields are manually
|
|
||||||
// encoded in R600CodeEmitter
|
|
||||||
//
|
|
||||||
// bits<16> OFFSET;
|
|
||||||
// bits<2> ENDIAN_SWAP = 0;
|
|
||||||
// bits<1> CONST_BUF_NO_STRIDE = 0;
|
|
||||||
// bits<1> MEGA_FETCH = 0;
|
|
||||||
// bits<1> ALT_CONST = 0;
|
|
||||||
// bits<2> BUFFER_INDEX_MODE = 0;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding
|
|
||||||
// is done in R600CodeEmitter
|
|
||||||
//
|
|
||||||
// Inst{79-64} = OFFSET;
|
|
||||||
// Inst{81-80} = ENDIAN_SWAP;
|
|
||||||
// Inst{82} = CONST_BUF_NO_STRIDE;
|
|
||||||
// Inst{83} = MEGA_FETCH;
|
|
||||||
// Inst{84} = ALT_CONST;
|
|
||||||
// Inst{86-85} = BUFFER_INDEX_MODE;
|
|
||||||
// Inst{95-86} = 0; Reserved
|
|
||||||
|
|
||||||
// VTX_WORD3 (Padding)
|
|
||||||
//
|
|
||||||
// Inst{127-96} = 0;
|
|
||||||
|
|
||||||
let VTXInst = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
|
|
||||||
: VTX_READ_eg <"VTX_READ_8 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst),
|
|
||||||
pattern> {
|
|
||||||
|
|
||||||
let MEGA_FETCH_COUNT = 1;
|
|
||||||
let DST_SEL_X = 0;
|
|
||||||
let DST_SEL_Y = 7; // Masked
|
|
||||||
let DST_SEL_Z = 7; // Masked
|
|
||||||
let DST_SEL_W = 7; // Masked
|
|
||||||
let DATA_FORMAT = 1; // FMT_8
|
|
||||||
}
|
|
||||||
|
|
||||||
class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
|
|
||||||
: VTX_READ_eg <"VTX_READ_16 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst),
|
|
||||||
pattern> {
|
|
||||||
let MEGA_FETCH_COUNT = 2;
|
|
||||||
let DST_SEL_X = 0;
|
|
||||||
let DST_SEL_Y = 7; // Masked
|
|
||||||
let DST_SEL_Z = 7; // Masked
|
|
||||||
let DST_SEL_W = 7; // Masked
|
|
||||||
let DATA_FORMAT = 5; // FMT_16
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
|
|
||||||
: VTX_READ_eg <"VTX_READ_32 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst),
|
|
||||||
pattern> {
|
|
||||||
|
|
||||||
let MEGA_FETCH_COUNT = 4;
|
|
||||||
let DST_SEL_X = 0;
|
|
||||||
let DST_SEL_Y = 7; // Masked
|
|
||||||
let DST_SEL_Z = 7; // Masked
|
|
||||||
let DST_SEL_W = 7; // Masked
|
|
||||||
let DATA_FORMAT = 0xD; // COLOR_32
|
|
||||||
|
|
||||||
// This is not really necessary, but there were some GPU hangs that appeared
|
|
||||||
// to be caused by ALU instructions in the next instruction group that wrote
|
|
||||||
// to the $ptr registers of the VTX_READ.
|
|
||||||
// e.g.
|
|
||||||
// %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
|
|
||||||
// %T2_X<def> = MOV %ZERO
|
|
||||||
//Adding this constraint prevents this from happening.
|
|
||||||
let Constraints = "$ptr.ptr = $dst";
|
|
||||||
}
|
|
||||||
|
|
||||||
class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
|
|
||||||
: VTX_READ_eg <"VTX_READ_128 $dst.XYZW, $ptr", buffer_id, (outs R600_Reg128:$dst),
|
|
||||||
pattern> {
|
|
||||||
|
|
||||||
let MEGA_FETCH_COUNT = 16;
|
|
||||||
let DST_SEL_X = 0;
|
|
||||||
let DST_SEL_Y = 1;
|
|
||||||
let DST_SEL_Z = 2;
|
|
||||||
let DST_SEL_W = 3;
|
|
||||||
let DATA_FORMAT = 0x22; // COLOR_32_32_32_32
|
|
||||||
|
|
||||||
// XXX: Need to force VTX_READ_128 instructions to write to the same register
|
|
||||||
// that holds its buffer address to avoid potential hangs. We can't use
|
|
||||||
// the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst
|
|
||||||
// registers are different sizes.
|
|
||||||
}
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// VTX Read from parameter memory space
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0,
|
|
||||||
[(set i32:$dst, (load_param_zexti8 ADDRVTX_READ:$ptr))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0,
|
|
||||||
[(set i32:$dst, (load_param_zexti16 ADDRVTX_READ:$ptr))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0,
|
|
||||||
[(set i32:$dst, (load_param ADDRVTX_READ:$ptr))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0,
|
|
||||||
[(set v4i32:$dst, (load_param ADDRVTX_READ:$ptr))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// VTX Read from global memory space
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
// 8-bit reads
|
|
||||||
def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1,
|
|
||||||
[(set i32:$dst, (zextloadi8_global ADDRVTX_READ:$ptr))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
// 32-bit reads
|
|
||||||
def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1,
|
|
||||||
[(set i32:$dst, (global_load ADDRVTX_READ:$ptr))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
// 128-bit reads
|
|
||||||
def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1,
|
|
||||||
[(set v4i32:$dst, (global_load ADDRVTX_READ:$ptr))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Constant Loads
|
|
||||||
// XXX: We are currently storing all constants in the global address space.
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
def CONSTANT_LOAD_eg : VTX_READ_32_eg <1,
|
|
||||||
[(set i32:$dst, (constant_load ADDRVTX_READ:$ptr))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Regist loads and stores - for indirect addressing
|
// Regist loads and stores - for indirect addressing
|
||||||
|
|
@ -1635,6 +1637,122 @@ def RAT_STORE_DWORD_cm : EG_CF_RAT <
|
||||||
let eop = 0; // This bit is not used on Cayman.
|
let eop = 0; // This bit is not used on Cayman.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class VTX_READ_cm <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
|
||||||
|
: VTX_WORD0_cm, VTX_READ<name, buffer_id, outs, pattern> {
|
||||||
|
|
||||||
|
// Static fields
|
||||||
|
let VC_INST = 0;
|
||||||
|
let FETCH_TYPE = 2;
|
||||||
|
let FETCH_WHOLE_QUAD = 0;
|
||||||
|
let BUFFER_ID = buffer_id;
|
||||||
|
let SRC_REL = 0;
|
||||||
|
// XXX: We can infer this field based on the SRC_GPR. This would allow us
|
||||||
|
// to store vertex addresses in any channel, not just X.
|
||||||
|
let SRC_SEL_X = 0;
|
||||||
|
let SRC_SEL_Y = 0;
|
||||||
|
let STRUCTURED_READ = 0;
|
||||||
|
let LDS_REQ = 0;
|
||||||
|
let COALESCED_READ = 0;
|
||||||
|
|
||||||
|
let Inst{31-0} = Word0;
|
||||||
|
}
|
||||||
|
|
||||||
|
class VTX_READ_8_cm <bits<8> buffer_id, list<dag> pattern>
|
||||||
|
: VTX_READ_cm <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id,
|
||||||
|
(outs R600_TReg32_X:$dst_gpr), pattern> {
|
||||||
|
|
||||||
|
let DST_SEL_X = 0;
|
||||||
|
let DST_SEL_Y = 7; // Masked
|
||||||
|
let DST_SEL_Z = 7; // Masked
|
||||||
|
let DST_SEL_W = 7; // Masked
|
||||||
|
let DATA_FORMAT = 1; // FMT_8
|
||||||
|
}
|
||||||
|
|
||||||
|
class VTX_READ_16_cm <bits<8> buffer_id, list<dag> pattern>
|
||||||
|
: VTX_READ_cm <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id,
|
||||||
|
(outs R600_TReg32_X:$dst_gpr), pattern> {
|
||||||
|
let DST_SEL_X = 0;
|
||||||
|
let DST_SEL_Y = 7; // Masked
|
||||||
|
let DST_SEL_Z = 7; // Masked
|
||||||
|
let DST_SEL_W = 7; // Masked
|
||||||
|
let DATA_FORMAT = 5; // FMT_16
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
class VTX_READ_32_cm <bits<8> buffer_id, list<dag> pattern>
|
||||||
|
: VTX_READ_cm <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id,
|
||||||
|
(outs R600_TReg32_X:$dst_gpr), pattern> {
|
||||||
|
|
||||||
|
let DST_SEL_X = 0;
|
||||||
|
let DST_SEL_Y = 7; // Masked
|
||||||
|
let DST_SEL_Z = 7; // Masked
|
||||||
|
let DST_SEL_W = 7; // Masked
|
||||||
|
let DATA_FORMAT = 0xD; // COLOR_32
|
||||||
|
|
||||||
|
// This is not really necessary, but there were some GPU hangs that appeared
|
||||||
|
// to be caused by ALU instructions in the next instruction group that wrote
|
||||||
|
// to the $src_gpr registers of the VTX_READ.
|
||||||
|
// e.g.
|
||||||
|
// %T3_X<def> = VTX_READ_PARAM_32_eg %T2_X<kill>, 24
|
||||||
|
// %T2_X<def> = MOV %ZERO
|
||||||
|
//Adding this constraint prevents this from happening.
|
||||||
|
let Constraints = "$src_gpr.ptr = $dst_gpr";
|
||||||
|
}
|
||||||
|
|
||||||
|
class VTX_READ_128_cm <bits<8> buffer_id, list<dag> pattern>
|
||||||
|
: VTX_READ_cm <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id,
|
||||||
|
(outs R600_Reg128:$dst_gpr), pattern> {
|
||||||
|
|
||||||
|
let DST_SEL_X = 0;
|
||||||
|
let DST_SEL_Y = 1;
|
||||||
|
let DST_SEL_Z = 2;
|
||||||
|
let DST_SEL_W = 3;
|
||||||
|
let DATA_FORMAT = 0x22; // COLOR_32_32_32_32
|
||||||
|
|
||||||
|
// XXX: Need to force VTX_READ_128 instructions to write to the same register
|
||||||
|
// that holds its buffer address to avoid potential hangs. We can't use
|
||||||
|
// the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst
|
||||||
|
// registers are different sizes.
|
||||||
|
}
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// VTX Read from parameter memory space
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
def VTX_READ_PARAM_8_cm : VTX_READ_8_cm <0,
|
||||||
|
[(set i32:$dst_gpr, (load_param_zexti8 ADDRVTX_READ:$src_gpr))]
|
||||||
|
>;
|
||||||
|
|
||||||
|
def VTX_READ_PARAM_16_cm : VTX_READ_16_cm <0,
|
||||||
|
[(set i32:$dst_gpr, (load_param_zexti16 ADDRVTX_READ:$src_gpr))]
|
||||||
|
>;
|
||||||
|
|
||||||
|
def VTX_READ_PARAM_32_cm : VTX_READ_32_cm <0,
|
||||||
|
[(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
|
||||||
|
>;
|
||||||
|
|
||||||
|
def VTX_READ_PARAM_128_cm : VTX_READ_128_cm <0,
|
||||||
|
[(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
|
||||||
|
>;
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// VTX Read from global memory space
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// 8-bit reads
|
||||||
|
def VTX_READ_GLOBAL_8_cm : VTX_READ_8_cm <1,
|
||||||
|
[(set i32:$dst_gpr, (zextloadi8_global ADDRVTX_READ:$src_gpr))]
|
||||||
|
>;
|
||||||
|
|
||||||
|
// 32-bit reads
|
||||||
|
def VTX_READ_GLOBAL_32_cm : VTX_READ_32_cm <1,
|
||||||
|
[(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
|
||||||
|
>;
|
||||||
|
|
||||||
|
// 128-bit reads
|
||||||
|
def VTX_READ_GLOBAL_128_cm : VTX_READ_128_cm <1,
|
||||||
|
[(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))]
|
||||||
|
>;
|
||||||
|
|
||||||
} // End isCayman
|
} // End isCayman
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
@ -1755,7 +1873,7 @@ def CONST_COPY : Instruction {
|
||||||
def TEX_VTX_CONSTBUF :
|
def TEX_VTX_CONSTBUF :
|
||||||
InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr",
|
InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr",
|
||||||
[(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
|
[(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
|
||||||
VTX_WORD1_GPR, VTX_WORD0 {
|
VTX_WORD1_GPR, VTX_WORD0_eg {
|
||||||
|
|
||||||
let VC_INST = 0;
|
let VC_INST = 0;
|
||||||
let FETCH_TYPE = 2;
|
let FETCH_TYPE = 2;
|
||||||
|
|
@ -1809,7 +1927,7 @@ def TEX_VTX_CONSTBUF :
|
||||||
def TEX_VTX_TEXBUF:
|
def TEX_VTX_TEXBUF:
|
||||||
InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr",
|
InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr",
|
||||||
[(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
|
[(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>,
|
||||||
VTX_WORD1_GPR, VTX_WORD0 {
|
VTX_WORD1_GPR, VTX_WORD0_eg {
|
||||||
|
|
||||||
let VC_INST = 0;
|
let VC_INST = 0;
|
||||||
let FETCH_TYPE = 2;
|
let FETCH_TYPE = 2;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,25 @@
|
||||||
|
; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=barts | FileCheck --check-prefix=NI-CHECK %s
|
||||||
|
; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=cayman | FileCheck --check-prefix=CM-CHECK %s
|
||||||
|
|
||||||
|
; NI-CHECK: @vtx_fetch32
|
||||||
|
; NI-CHECK: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0 ; encoding: [0x40,0x01,0x0[[GPR]],0x10,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x08,0x00
|
||||||
|
; CM-CHECK: @vtx_fetch32
|
||||||
|
; CM-CHECK: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0 ; encoding: [0x40,0x01,0x0[[GPR]],0x00,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x00,0x00
|
||||||
|
|
||||||
|
define void @vtx_fetch32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
||||||
|
entry:
|
||||||
|
%0 = load i32 addrspace(1)* %in
|
||||||
|
store i32 %0, i32 addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; NI-CHECK: @vtx_fetch128
|
||||||
|
; NI-CHECK: VTX_READ_128 T[[DST:[0-9]]].XYZW, T[[SRC:[0-9]]].X, 0 ; encoding: [0x40,0x01,0x0[[SRC]],0x40,0x0[[DST]],0x10,0x8d,0x18,0x00,0x00,0x08,0x00
|
||||||
|
; XXX: Add a case for Cayman when v4i32 stores are supported.
|
||||||
|
|
||||||
|
define void @vtx_fetch128(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
|
||||||
|
entry:
|
||||||
|
%0 = load <4 x i32> addrspace(1)* %in
|
||||||
|
store <4 x i32> %0, <4 x i32> addrspace(1)* %out
|
||||||
|
ret void
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue