From ecf9d86404c1f16662cdfb4a3db6c694080b54ed Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 14 Jun 2013 22:12:30 +0000 Subject: [PATCH] R600: Use correct encoding for Vertex Fetch instructions on Cayman Reviewed-by: Vincent Lejeune llvm-svn: 184016 --- .../R600/MCTargetDesc/R600MCCodeEmitter.cpp | 4 +- llvm/lib/Target/R600/R600InstrFormats.td | 28 +- llvm/lib/Target/R600/R600Instructions.td | 462 +++++++++++------- .../CodeGen/R600/vertex-fetch-encoding.ll | 25 + 4 files changed, 341 insertions(+), 178 deletions(-) create mode 100644 llvm/test/CodeGen/R600/vertex-fetch-encoding.ll diff --git a/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index 4d6c25c190bd..f470783fe2a1 100644 --- a/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/llvm/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -99,7 +99,9 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, } else if (IS_VTX(Desc)) { uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups); uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset - InstWord2 |= 1 << 19; + if (!(STI.getFeatureBits() & AMDGPU::FeatureCaymanISA)) { + InstWord2 |= 1 << 19; // Mega-Fetch bit + } Emit(InstWord01, OS); Emit(InstWord2, OS); diff --git a/llvm/lib/Target/R600/R600InstrFormats.td b/llvm/lib/Target/R600/R600InstrFormats.td index 618004fd2691..d31f18cb47b6 100644 --- a/llvm/lib/Target/R600/R600InstrFormats.td +++ b/llvm/lib/Target/R600/R600InstrFormats.td @@ -166,28 +166,46 @@ class R600ALU_Word1_OP2_r700 : R600ALU_Word1_OP2 { class VTX_WORD0 { field bits<32> Word0; - bits<7> SRC_GPR; + bits<7> src_gpr; bits<5> VC_INST; bits<2> FETCH_TYPE; bits<1> FETCH_WHOLE_QUAD; bits<8> BUFFER_ID; bits<1> SRC_REL; bits<2> SRC_SEL_X; - bits<6> MEGA_FETCH_COUNT; let Word0{4-0} = VC_INST; let Word0{6-5} = FETCH_TYPE; let Word0{7} = FETCH_WHOLE_QUAD; let Word0{15-8} = BUFFER_ID; - let Word0{22-16} = SRC_GPR; + let Word0{22-16} = src_gpr; let Word0{23} = SRC_REL; let Word0{25-24} = SRC_SEL_X; +} + +class VTX_WORD0_eg : VTX_WORD0 { + + bits<6> MEGA_FETCH_COUNT; + let Word0{31-26} = MEGA_FETCH_COUNT; } +class VTX_WORD0_cm : VTX_WORD0 { + + bits<2> SRC_SEL_Y; + bits<2> STRUCTURED_READ; + bits<1> LDS_REQ; + bits<1> COALESCED_READ; + + let Word0{27-26} = SRC_SEL_Y; + let Word0{29-28} = STRUCTURED_READ; + let Word0{30} = LDS_REQ; + let Word0{31} = COALESCED_READ; +} + class VTX_WORD1_GPR { field bits<32> Word1; - bits<7> DST_GPR; + bits<7> dst_gpr; bits<1> DST_REL; bits<3> DST_SEL_X; bits<3> DST_SEL_Y; @@ -199,7 +217,7 @@ class VTX_WORD1_GPR { bits<1> FORMAT_COMP_ALL; bits<1> SRF_MODE_ALL; - let Word1{6-0} = DST_GPR; + let Word1{6-0} = dst_gpr; let Word1{7} = DST_REL; let Word1{8} = 0; // Reserved let Word1{11-9} = DST_SEL_X; diff --git a/llvm/lib/Target/R600/R600Instructions.td b/llvm/lib/Target/R600/R600Instructions.td index 86ddd00ce580..27b0214b0092 100644 --- a/llvm/lib/Target/R600/R600Instructions.td +++ b/llvm/lib/Target/R600/R600Instructions.td @@ -261,6 +261,50 @@ class EG_CF_RAT cfinst, bits <6> ratinst, bits<4> mask, dag outs, } +class VTX_READ buffer_id, dag outs, list pattern> + : InstR600ISA , + VTX_WORD1_GPR { + + // Static fields + let DST_REL = 0; + // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, + // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, + // however, based on my testing if USE_CONST_FIELDS is set, then all + // these fields need to be set to 0. + let USE_CONST_FIELDS = 0; + let NUM_FORMAT_ALL = 1; + let FORMAT_COMP_ALL = 0; + let SRF_MODE_ALL = 0; + + let Inst{63-32} = Word1; + // LLVM can only encode 64-bit instructions, so these fields are manually + // encoded in R600CodeEmitter + // + // bits<16> OFFSET; + // bits<2> ENDIAN_SWAP = 0; + // bits<1> CONST_BUF_NO_STRIDE = 0; + // bits<1> MEGA_FETCH = 0; + // bits<1> ALT_CONST = 0; + // bits<2> BUFFER_INDEX_MODE = 0; + + // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding + // is done in R600CodeEmitter + // + // Inst{79-64} = OFFSET; + // Inst{81-80} = ENDIAN_SWAP; + // Inst{82} = CONST_BUF_NO_STRIDE; + // Inst{83} = MEGA_FETCH; + // Inst{84} = ALT_CONST; + // Inst{86-85} = BUFFER_INDEX_MODE; + // Inst{95-86} = 0; Reserved + + // VTX_WORD3 (Padding) + // + // Inst{127-96} = 0; + + let VTXInst = 1; +} + class LoadParamFrag : PatFrag < (ops node:$ptr), (load_type node:$ptr), [{ return isParamLoad(dyn_cast(N)); }] @@ -1249,6 +1293,133 @@ def RAT_WRITE_CACHELESS_128_eg : RAT_WRITE_CACHELESS_eg < [(global_store v4i32:$rw_gpr, i32:$index_gpr)] >; +class VTX_READ_eg buffer_id, dag outs, list pattern> + : VTX_WORD0_eg, VTX_READ { + + // Static fields + let VC_INST = 0; + let FETCH_TYPE = 2; + let FETCH_WHOLE_QUAD = 0; + let BUFFER_ID = buffer_id; + let SRC_REL = 0; + // XXX: We can infer this field based on the SRC_GPR. This would allow us + // to store vertex addresses in any channel, not just X. + let SRC_SEL_X = 0; + + let Inst{31-0} = Word0; +} + +class VTX_READ_8_eg buffer_id, list pattern> + : VTX_READ_eg <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id, + (outs R600_TReg32_X:$dst_gpr), pattern> { + + let MEGA_FETCH_COUNT = 1; + let DST_SEL_X = 0; + let DST_SEL_Y = 7; // Masked + let DST_SEL_Z = 7; // Masked + let DST_SEL_W = 7; // Masked + let DATA_FORMAT = 1; // FMT_8 +} + +class VTX_READ_16_eg buffer_id, list pattern> + : VTX_READ_eg <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id, + (outs R600_TReg32_X:$dst_gpr), pattern> { + let MEGA_FETCH_COUNT = 2; + let DST_SEL_X = 0; + let DST_SEL_Y = 7; // Masked + let DST_SEL_Z = 7; // Masked + let DST_SEL_W = 7; // Masked + let DATA_FORMAT = 5; // FMT_16 + +} + +class VTX_READ_32_eg buffer_id, list pattern> + : VTX_READ_eg <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id, + (outs R600_TReg32_X:$dst_gpr), pattern> { + + let MEGA_FETCH_COUNT = 4; + let DST_SEL_X = 0; + let DST_SEL_Y = 7; // Masked + let DST_SEL_Z = 7; // Masked + let DST_SEL_W = 7; // Masked + let DATA_FORMAT = 0xD; // COLOR_32 + + // This is not really necessary, but there were some GPU hangs that appeared + // to be caused by ALU instructions in the next instruction group that wrote + // to the $src_gpr registers of the VTX_READ. + // e.g. + // %T3_X = VTX_READ_PARAM_32_eg %T2_X, 24 + // %T2_X = MOV %ZERO + //Adding this constraint prevents this from happening. + let Constraints = "$src_gpr.ptr = $dst_gpr"; +} + +class VTX_READ_128_eg buffer_id, list pattern> + : VTX_READ_eg <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id, + (outs R600_Reg128:$dst_gpr), pattern> { + + let MEGA_FETCH_COUNT = 16; + let DST_SEL_X = 0; + let DST_SEL_Y = 1; + let DST_SEL_Z = 2; + let DST_SEL_W = 3; + let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 + + // XXX: Need to force VTX_READ_128 instructions to write to the same register + // that holds its buffer address to avoid potential hangs. We can't use + // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst + // registers are different sizes. +} + +//===----------------------------------------------------------------------===// +// VTX Read from parameter memory space +//===----------------------------------------------------------------------===// + +def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0, + [(set i32:$dst_gpr, (load_param_zexti8 ADDRVTX_READ:$src_gpr))] +>; + +def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0, + [(set i32:$dst_gpr, (load_param_zexti16 ADDRVTX_READ:$src_gpr))] +>; + +def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, + [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] +>; + +def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, + [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] +>; + +//===----------------------------------------------------------------------===// +// VTX Read from global memory space +//===----------------------------------------------------------------------===// + +// 8-bit reads +def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1, + [(set i32:$dst_gpr, (zextloadi8_global ADDRVTX_READ:$src_gpr))] +>; + +// 32-bit reads +def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1, + [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] +>; + +// 128-bit reads +def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, + [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] +>; + +//===----------------------------------------------------------------------===// +// Constant Loads +// XXX: We are currently storing all constants in the global address space. +//===----------------------------------------------------------------------===// + +def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, + [(set i32:$dst_gpr, (constant_load ADDRVTX_READ:$src_gpr))] +>; + + } // End Predicates = [isEG] //===----------------------------------------------------------------------===// @@ -1403,176 +1574,7 @@ let hasSideEffects = 1 in { let END_OF_PROGRAM = 1; } -//===----------------------------------------------------------------------===// -// Memory read/write instructions -//===----------------------------------------------------------------------===// - -class VTX_READ_eg buffer_id, dag outs, list pattern> - : InstR600ISA , - VTX_WORD1_GPR, VTX_WORD0 { - - // Static fields - let VC_INST = 0; - let FETCH_TYPE = 2; - let FETCH_WHOLE_QUAD = 0; - let BUFFER_ID = buffer_id; - let SRC_REL = 0; - // XXX: We can infer this field based on the SRC_GPR. This would allow us - // to store vertex addresses in any channel, not just X. - let SRC_SEL_X = 0; - let DST_REL = 0; - // The docs say that if this bit is set, then DATA_FORMAT, NUM_FORMAT_ALL, - // FORMAT_COMP_ALL, SRF_MODE_ALL, and ENDIAN_SWAP fields will be ignored, - // however, based on my testing if USE_CONST_FIELDS is set, then all - // these fields need to be set to 0. - let USE_CONST_FIELDS = 0; - let NUM_FORMAT_ALL = 1; - let FORMAT_COMP_ALL = 0; - let SRF_MODE_ALL = 0; - - let Inst{31-0} = Word0; - let Inst{63-32} = Word1; - // LLVM can only encode 64-bit instructions, so these fields are manually - // encoded in R600CodeEmitter - // - // bits<16> OFFSET; - // bits<2> ENDIAN_SWAP = 0; - // bits<1> CONST_BUF_NO_STRIDE = 0; - // bits<1> MEGA_FETCH = 0; - // bits<1> ALT_CONST = 0; - // bits<2> BUFFER_INDEX_MODE = 0; - - - - // VTX_WORD2 (LLVM can only encode 64-bit instructions, so WORD2 encoding - // is done in R600CodeEmitter - // - // Inst{79-64} = OFFSET; - // Inst{81-80} = ENDIAN_SWAP; - // Inst{82} = CONST_BUF_NO_STRIDE; - // Inst{83} = MEGA_FETCH; - // Inst{84} = ALT_CONST; - // Inst{86-85} = BUFFER_INDEX_MODE; - // Inst{95-86} = 0; Reserved - - // VTX_WORD3 (Padding) - // - // Inst{127-96} = 0; - - let VTXInst = 1; -} - -class VTX_READ_8_eg buffer_id, list pattern> - : VTX_READ_eg <"VTX_READ_8 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst), - pattern> { - - let MEGA_FETCH_COUNT = 1; - let DST_SEL_X = 0; - let DST_SEL_Y = 7; // Masked - let DST_SEL_Z = 7; // Masked - let DST_SEL_W = 7; // Masked - let DATA_FORMAT = 1; // FMT_8 -} - -class VTX_READ_16_eg buffer_id, list pattern> - : VTX_READ_eg <"VTX_READ_16 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst), - pattern> { - let MEGA_FETCH_COUNT = 2; - let DST_SEL_X = 0; - let DST_SEL_Y = 7; // Masked - let DST_SEL_Z = 7; // Masked - let DST_SEL_W = 7; // Masked - let DATA_FORMAT = 5; // FMT_16 - -} - -class VTX_READ_32_eg buffer_id, list pattern> - : VTX_READ_eg <"VTX_READ_32 $dst, $ptr", buffer_id, (outs R600_TReg32_X:$dst), - pattern> { - - let MEGA_FETCH_COUNT = 4; - let DST_SEL_X = 0; - let DST_SEL_Y = 7; // Masked - let DST_SEL_Z = 7; // Masked - let DST_SEL_W = 7; // Masked - let DATA_FORMAT = 0xD; // COLOR_32 - - // This is not really necessary, but there were some GPU hangs that appeared - // to be caused by ALU instructions in the next instruction group that wrote - // to the $ptr registers of the VTX_READ. - // e.g. - // %T3_X = VTX_READ_PARAM_32_eg %T2_X, 24 - // %T2_X = MOV %ZERO - //Adding this constraint prevents this from happening. - let Constraints = "$ptr.ptr = $dst"; -} - -class VTX_READ_128_eg buffer_id, list pattern> - : VTX_READ_eg <"VTX_READ_128 $dst.XYZW, $ptr", buffer_id, (outs R600_Reg128:$dst), - pattern> { - - let MEGA_FETCH_COUNT = 16; - let DST_SEL_X = 0; - let DST_SEL_Y = 1; - let DST_SEL_Z = 2; - let DST_SEL_W = 3; - let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 - - // XXX: Need to force VTX_READ_128 instructions to write to the same register - // that holds its buffer address to avoid potential hangs. We can't use - // the same constraint as VTX_READ_32_eg, because the $ptr.ptr and $dst - // registers are different sizes. -} - -//===----------------------------------------------------------------------===// -// VTX Read from parameter memory space -//===----------------------------------------------------------------------===// - -def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <0, - [(set i32:$dst, (load_param_zexti8 ADDRVTX_READ:$ptr))] ->; - -def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <0, - [(set i32:$dst, (load_param_zexti16 ADDRVTX_READ:$ptr))] ->; - -def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <0, - [(set i32:$dst, (load_param ADDRVTX_READ:$ptr))] ->; - -def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <0, - [(set v4i32:$dst, (load_param ADDRVTX_READ:$ptr))] ->; - -//===----------------------------------------------------------------------===// -// VTX Read from global memory space -//===----------------------------------------------------------------------===// - -// 8-bit reads -def VTX_READ_GLOBAL_8_eg : VTX_READ_8_eg <1, - [(set i32:$dst, (zextloadi8_global ADDRVTX_READ:$ptr))] ->; - -// 32-bit reads -def VTX_READ_GLOBAL_32_eg : VTX_READ_32_eg <1, - [(set i32:$dst, (global_load ADDRVTX_READ:$ptr))] ->; - -// 128-bit reads -def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, - [(set v4i32:$dst, (global_load ADDRVTX_READ:$ptr))] ->; - -//===----------------------------------------------------------------------===// -// Constant Loads -// XXX: We are currently storing all constants in the global address space. -//===----------------------------------------------------------------------===// - -def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, - [(set i32:$dst, (constant_load ADDRVTX_READ:$ptr))] ->; - -} +} // End Predicates = [isEGorCayman] //===----------------------------------------------------------------------===// // Regist loads and stores - for indirect addressing @@ -1635,6 +1637,122 @@ def RAT_STORE_DWORD_cm : EG_CF_RAT < let eop = 0; // This bit is not used on Cayman. } +class VTX_READ_cm buffer_id, dag outs, list pattern> + : VTX_WORD0_cm, VTX_READ { + + // Static fields + let VC_INST = 0; + let FETCH_TYPE = 2; + let FETCH_WHOLE_QUAD = 0; + let BUFFER_ID = buffer_id; + let SRC_REL = 0; + // XXX: We can infer this field based on the SRC_GPR. This would allow us + // to store vertex addresses in any channel, not just X. + let SRC_SEL_X = 0; + let SRC_SEL_Y = 0; + let STRUCTURED_READ = 0; + let LDS_REQ = 0; + let COALESCED_READ = 0; + + let Inst{31-0} = Word0; +} + +class VTX_READ_8_cm buffer_id, list pattern> + : VTX_READ_cm <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id, + (outs R600_TReg32_X:$dst_gpr), pattern> { + + let DST_SEL_X = 0; + let DST_SEL_Y = 7; // Masked + let DST_SEL_Z = 7; // Masked + let DST_SEL_W = 7; // Masked + let DATA_FORMAT = 1; // FMT_8 +} + +class VTX_READ_16_cm buffer_id, list pattern> + : VTX_READ_cm <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id, + (outs R600_TReg32_X:$dst_gpr), pattern> { + let DST_SEL_X = 0; + let DST_SEL_Y = 7; // Masked + let DST_SEL_Z = 7; // Masked + let DST_SEL_W = 7; // Masked + let DATA_FORMAT = 5; // FMT_16 + +} + +class VTX_READ_32_cm buffer_id, list pattern> + : VTX_READ_cm <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id, + (outs R600_TReg32_X:$dst_gpr), pattern> { + + let DST_SEL_X = 0; + let DST_SEL_Y = 7; // Masked + let DST_SEL_Z = 7; // Masked + let DST_SEL_W = 7; // Masked + let DATA_FORMAT = 0xD; // COLOR_32 + + // This is not really necessary, but there were some GPU hangs that appeared + // to be caused by ALU instructions in the next instruction group that wrote + // to the $src_gpr registers of the VTX_READ. + // e.g. + // %T3_X = VTX_READ_PARAM_32_eg %T2_X, 24 + // %T2_X = MOV %ZERO + //Adding this constraint prevents this from happening. + let Constraints = "$src_gpr.ptr = $dst_gpr"; +} + +class VTX_READ_128_cm buffer_id, list pattern> + : VTX_READ_cm <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id, + (outs R600_Reg128:$dst_gpr), pattern> { + + let DST_SEL_X = 0; + let DST_SEL_Y = 1; + let DST_SEL_Z = 2; + let DST_SEL_W = 3; + let DATA_FORMAT = 0x22; // COLOR_32_32_32_32 + + // XXX: Need to force VTX_READ_128 instructions to write to the same register + // that holds its buffer address to avoid potential hangs. We can't use + // the same constraint as VTX_READ_32_eg, because the $src_gpr.ptr and $dst + // registers are different sizes. +} + +//===----------------------------------------------------------------------===// +// VTX Read from parameter memory space +//===----------------------------------------------------------------------===// +def VTX_READ_PARAM_8_cm : VTX_READ_8_cm <0, + [(set i32:$dst_gpr, (load_param_zexti8 ADDRVTX_READ:$src_gpr))] +>; + +def VTX_READ_PARAM_16_cm : VTX_READ_16_cm <0, + [(set i32:$dst_gpr, (load_param_zexti16 ADDRVTX_READ:$src_gpr))] +>; + +def VTX_READ_PARAM_32_cm : VTX_READ_32_cm <0, + [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] +>; + +def VTX_READ_PARAM_128_cm : VTX_READ_128_cm <0, + [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))] +>; + +//===----------------------------------------------------------------------===// +// VTX Read from global memory space +//===----------------------------------------------------------------------===// + +// 8-bit reads +def VTX_READ_GLOBAL_8_cm : VTX_READ_8_cm <1, + [(set i32:$dst_gpr, (zextloadi8_global ADDRVTX_READ:$src_gpr))] +>; + +// 32-bit reads +def VTX_READ_GLOBAL_32_cm : VTX_READ_32_cm <1, + [(set i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] +>; + +// 128-bit reads +def VTX_READ_GLOBAL_128_cm : VTX_READ_128_cm <1, + [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] +>; + } // End isCayman //===----------------------------------------------------------------------===// @@ -1755,7 +1873,7 @@ def CONST_COPY : Instruction { def TEX_VTX_CONSTBUF : InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr", [(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>, - VTX_WORD1_GPR, VTX_WORD0 { + VTX_WORD1_GPR, VTX_WORD0_eg { let VC_INST = 0; let FETCH_TYPE = 2; @@ -1809,7 +1927,7 @@ def TEX_VTX_CONSTBUF : def TEX_VTX_TEXBUF: InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr", [(set v4f32:$dst, (int_R600_load_texbuf ADDRGA_VAR_OFFSET:$ptr, imm:$BUFFER_ID))]>, -VTX_WORD1_GPR, VTX_WORD0 { +VTX_WORD1_GPR, VTX_WORD0_eg { let VC_INST = 0; let FETCH_TYPE = 2; diff --git a/llvm/test/CodeGen/R600/vertex-fetch-encoding.ll b/llvm/test/CodeGen/R600/vertex-fetch-encoding.ll new file mode 100644 index 000000000000..7ea7a5c079cf --- /dev/null +++ b/llvm/test/CodeGen/R600/vertex-fetch-encoding.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=barts | FileCheck --check-prefix=NI-CHECK %s +; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=cayman | FileCheck --check-prefix=CM-CHECK %s + +; NI-CHECK: @vtx_fetch32 +; NI-CHECK: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0 ; encoding: [0x40,0x01,0x0[[GPR]],0x10,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x08,0x00 +; CM-CHECK: @vtx_fetch32 +; CM-CHECK: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0 ; encoding: [0x40,0x01,0x0[[GPR]],0x00,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x00,0x00 + +define void @vtx_fetch32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +entry: + %0 = load i32 addrspace(1)* %in + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; NI-CHECK: @vtx_fetch128 +; NI-CHECK: VTX_READ_128 T[[DST:[0-9]]].XYZW, T[[SRC:[0-9]]].X, 0 ; encoding: [0x40,0x01,0x0[[SRC]],0x40,0x0[[DST]],0x10,0x8d,0x18,0x00,0x00,0x08,0x00 +; XXX: Add a case for Cayman when v4i32 stores are supported. + +define void @vtx_fetch128(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { +entry: + %0 = load <4 x i32> addrspace(1)* %in + store <4 x i32> %0, <4 x i32> addrspace(1)* %out + ret void +}