GlobalISel: Add and_trivial_mask to all_combines
Also make up a new category of combines.
This commit is contained in:
		
							parent
							
								
									4ef9275b9b
								
							
						
					
					
						commit
						201f770f16
					
				| 
						 | 
				
			
			@ -357,6 +357,8 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
 | 
			
		|||
                                        binop_right_to_zero, p2i_to_i2p,
 | 
			
		||||
                                        i2p_to_p2i]>;
 | 
			
		||||
 | 
			
		||||
def known_bits_simplifications : GICombineGroup<[and_trivial_mask]>;
 | 
			
		||||
 | 
			
		||||
def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend]>;
 | 
			
		||||
 | 
			
		||||
def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp]>;
 | 
			
		||||
| 
						 | 
				
			
			@ -367,4 +369,5 @@ def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain,
 | 
			
		|||
    identity_combines, simplify_add_to_sub,
 | 
			
		||||
    hoist_logic_op_with_same_opcode_hands,
 | 
			
		||||
    shl_ashr_to_sext_inreg, sext_inreg_of_load,
 | 
			
		||||
    width_reduction_combines, select_combines]>;
 | 
			
		||||
    width_reduction_combines, select_combines,
 | 
			
		||||
    known_bits_simplifications]>;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -159,11 +159,10 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
 | 
			
		|||
;
 | 
			
		||||
; GFX8-LABEL: s_add_v2i16_neg_inline_imm_splat:
 | 
			
		||||
; GFX8:       ; %bb.0:
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_mov_b32 s3, 0xffff
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_mov_b32 s1, 0xffc0
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s0, s0, s3
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s2, s2, s3
 | 
			
		||||
; GFX8-NEXT:    s_add_i32 s0, s0, s1
 | 
			
		||||
; GFX8-NEXT:    s_add_i32 s2, s2, s1
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s1, s2, 16
 | 
			
		||||
| 
						 | 
				
			
			@ -186,10 +185,9 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) {
 | 
			
		|||
;
 | 
			
		||||
; GFX8-LABEL: s_add_v2i16_neg_inline_imm_lo:
 | 
			
		||||
; GFX8:       ; %bb.0:
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_mov_b32 s2, 0xffff
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s0, s0, s2
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s1, s1, s2
 | 
			
		||||
; GFX8-NEXT:    s_add_i32 s0, s0, 0xffc0
 | 
			
		||||
; GFX8-NEXT:    s_add_i32 s1, s1, 4
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
 | 
			
		||||
| 
						 | 
				
			
			@ -212,10 +210,9 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) {
 | 
			
		|||
;
 | 
			
		||||
; GFX8-LABEL: s_add_v2i16_neg_inline_imm_hi:
 | 
			
		||||
; GFX8:       ; %bb.0:
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_mov_b32 s2, 0xffff
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s0, s0, s2
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s1, s1, s2
 | 
			
		||||
; GFX8-NEXT:    s_add_i32 s0, s0, 4
 | 
			
		||||
; GFX8-NEXT:    s_add_i32 s1, s1, 0xffc0
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
 | 
			
		||||
| 
						 | 
				
			
			@ -239,13 +236,11 @@ define amdgpu_ps i32 @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) {
 | 
			
		|||
;
 | 
			
		||||
; GFX8-LABEL: s_add_v2i16:
 | 
			
		||||
; GFX8:       ; %bb.0:
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_mov_b32 s3, 0xffff
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s0, s0, s3
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s1, s1, s3
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s2, s2, s3
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s4, s4, s3
 | 
			
		||||
; GFX8-NEXT:    s_add_i32 s0, s0, s1
 | 
			
		||||
; GFX8-NEXT:    s_add_i32 s2, s2, s4
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s1, s2, 16
 | 
			
		||||
| 
						 | 
				
			
			@ -271,13 +266,11 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg
 | 
			
		|||
; GFX8-LABEL: s_add_v2i16_fneg_lhs:
 | 
			
		||||
; GFX8:       ; %bb.0:
 | 
			
		||||
; GFX8-NEXT:    s_xor_b32 s0, s0, 0x80008000
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_mov_b32 s3, 0xffff
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s0, s0, s3
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s1, s1, s3
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s2, s2, s3
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s4, s4, s3
 | 
			
		||||
; GFX8-NEXT:    s_add_i32 s0, s0, s1
 | 
			
		||||
; GFX8-NEXT:    s_add_i32 s2, s2, s4
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s1, s2, 16
 | 
			
		||||
| 
						 | 
				
			
			@ -305,13 +298,11 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg
 | 
			
		|||
; GFX8-LABEL: s_add_v2i16_fneg_rhs:
 | 
			
		||||
; GFX8:       ; %bb.0:
 | 
			
		||||
; GFX8-NEXT:    s_xor_b32 s1, s1, 0x80008000
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_mov_b32 s3, 0xffff
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s0, s0, s3
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s1, s1, s3
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s2, s2, s3
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s4, s4, s3
 | 
			
		||||
; GFX8-NEXT:    s_add_i32 s0, s0, s1
 | 
			
		||||
; GFX8-NEXT:    s_add_i32 s2, s2, s4
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s1, s2, 16
 | 
			
		||||
| 
						 | 
				
			
			@ -343,13 +334,11 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> inreg %a, <2 x ha
 | 
			
		|||
; GFX8-NEXT:    s_mov_b32 s2, 0x80008000
 | 
			
		||||
; GFX8-NEXT:    s_xor_b32 s1, s1, s2
 | 
			
		||||
; GFX8-NEXT:    s_xor_b32 s0, s0, s2
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_mov_b32 s3, 0xffff
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s0, s0, s3
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s1, s1, s3
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s2, s2, s3
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s4, s4, s3
 | 
			
		||||
; GFX8-NEXT:    s_add_i32 s0, s0, s1
 | 
			
		||||
; GFX8-NEXT:    s_add_i32 s2, s2, s4
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s1, s2, 16
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -47,8 +47,8 @@ define i8 @v_ashr_i8_7(i8 %value) {
 | 
			
		|||
; GFX9-LABEL: v_ashr_i8_7:
 | 
			
		||||
; GFX9:       ; %bb.0:
 | 
			
		||||
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 | 
			
		||||
; GFX9-NEXT:    s_mov_b32 s4, 7
 | 
			
		||||
; GFX9-NEXT:    v_ashrrev_i16_sdwa v0, s4, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 | 
			
		||||
; GFX9-NEXT:    v_mov_b32_e32 v1, 7
 | 
			
		||||
; GFX9-NEXT:    v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 | 
			
		||||
; GFX9-NEXT:    s_setpc_b64 s[30:31]
 | 
			
		||||
  %result = ashr i8 %value, 7
 | 
			
		||||
  ret i8 %result
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -514,7 +514,6 @@ define i64 @v_bswap_i48(i64 %src) {
 | 
			
		|||
; GFX7-NEXT:    v_alignbit_b32 v0, v0, v0, 24
 | 
			
		||||
; GFX7-NEXT:    v_bfi_b32 v2, s4, v0, v2
 | 
			
		||||
; GFX7-NEXT:    v_lshr_b64 v[0:1], v[1:2], 16
 | 
			
		||||
; GFX7-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 | 
			
		||||
; GFX7-NEXT:    s_setpc_b64 s[30:31]
 | 
			
		||||
;
 | 
			
		||||
; GFX8-LABEL: v_bswap_i48:
 | 
			
		||||
| 
						 | 
				
			
			@ -524,7 +523,6 @@ define i64 @v_bswap_i48(i64 %src) {
 | 
			
		|||
; GFX8-NEXT:    v_perm_b32 v1, 0, v1, s4
 | 
			
		||||
; GFX8-NEXT:    v_perm_b32 v2, 0, v0, s4
 | 
			
		||||
; GFX8-NEXT:    v_lshrrev_b64 v[0:1], 16, v[1:2]
 | 
			
		||||
; GFX8-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 | 
			
		||||
; GFX8-NEXT:    s_setpc_b64 s[30:31]
 | 
			
		||||
;
 | 
			
		||||
; GFX9-LABEL: v_bswap_i48:
 | 
			
		||||
| 
						 | 
				
			
			@ -534,7 +532,6 @@ define i64 @v_bswap_i48(i64 %src) {
 | 
			
		|||
; GFX9-NEXT:    v_perm_b32 v1, 0, v1, s4
 | 
			
		||||
; GFX9-NEXT:    v_perm_b32 v2, 0, v0, s4
 | 
			
		||||
; GFX9-NEXT:    v_lshrrev_b64 v[0:1], 16, v[1:2]
 | 
			
		||||
; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 | 
			
		||||
; GFX9-NEXT:    s_setpc_b64 s[30:31]
 | 
			
		||||
  %trunc = trunc i64 %src to i48
 | 
			
		||||
  %bswap = call i48 @llvm.bswap.i48(i48 %trunc)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -224,30 +224,28 @@ define <4 x float> @v_uitofp_v4i8_to_v4f32(i32 %arg0) nounwind {
 | 
			
		|||
; SI-LABEL: v_uitofp_v4i8_to_v4f32:
 | 
			
		||||
; SI:       ; %bb.0:
 | 
			
		||||
; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 | 
			
		||||
; SI-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
 | 
			
		||||
; SI-NEXT:    s_movk_i32 s4, 0xff
 | 
			
		||||
; SI-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
 | 
			
		||||
; SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
 | 
			
		||||
; SI-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
 | 
			
		||||
; SI-NEXT:    v_and_b32_e32 v0, s4, v0
 | 
			
		||||
; SI-NEXT:    v_and_b32_e32 v3, s4, v0
 | 
			
		||||
; SI-NEXT:    v_and_b32_e32 v1, s4, v1
 | 
			
		||||
; SI-NEXT:    v_and_b32_e32 v2, s4, v2
 | 
			
		||||
; SI-NEXT:    v_and_b32_e32 v3, s4, v3
 | 
			
		||||
; SI-NEXT:    v_cvt_f32_ubyte0_e32 v0, v0
 | 
			
		||||
; SI-NEXT:    v_cvt_f32_ubyte0_e32 v4, v3
 | 
			
		||||
; SI-NEXT:    v_cvt_f32_ubyte3_e32 v3, v0
 | 
			
		||||
; SI-NEXT:    v_cvt_f32_ubyte0_e32 v1, v1
 | 
			
		||||
; SI-NEXT:    v_cvt_f32_ubyte0_e32 v2, v2
 | 
			
		||||
; SI-NEXT:    v_cvt_f32_ubyte0_e32 v3, v3
 | 
			
		||||
; SI-NEXT:    v_mov_b32_e32 v0, v4
 | 
			
		||||
; SI-NEXT:    s_setpc_b64 s[30:31]
 | 
			
		||||
;
 | 
			
		||||
; VI-LABEL: v_uitofp_v4i8_to_v4f32:
 | 
			
		||||
; VI:       ; %bb.0:
 | 
			
		||||
; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 | 
			
		||||
; VI-NEXT:    s_movk_i32 s4, 0xff
 | 
			
		||||
; VI-NEXT:    v_mov_b32_e32 v3, s4
 | 
			
		||||
; VI-NEXT:    v_mov_b32_e32 v2, s4
 | 
			
		||||
; VI-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
 | 
			
		||||
; VI-NEXT:    v_and_b32_sdwa v2, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 | 
			
		||||
; VI-NEXT:    v_and_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 | 
			
		||||
; VI-NEXT:    v_cvt_f32_ubyte0_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
 | 
			
		||||
; VI-NEXT:    v_and_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
 | 
			
		||||
; VI-NEXT:    v_cvt_f32_ubyte0_e32 v3, v0
 | 
			
		||||
; VI-NEXT:    v_cvt_f32_ubyte3_e32 v3, v0
 | 
			
		||||
; VI-NEXT:    v_cvt_f32_ubyte0_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
 | 
			
		||||
; VI-NEXT:    v_cvt_f32_ubyte0_e32 v2, v2
 | 
			
		||||
; VI-NEXT:    v_mov_b32_e32 v0, v4
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| 
						 | 
				
			
			@ -46,8 +46,8 @@ define i8 @v_lshr_i8_7(i8 %value) {
 | 
			
		|||
; GFX9-LABEL: v_lshr_i8_7:
 | 
			
		||||
; GFX9:       ; %bb.0:
 | 
			
		||||
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 | 
			
		||||
; GFX9-NEXT:    s_mov_b32 s4, 7
 | 
			
		||||
; GFX9-NEXT:    v_lshrrev_b16_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 | 
			
		||||
; GFX9-NEXT:    v_mov_b32_e32 v1, 7
 | 
			
		||||
; GFX9-NEXT:    v_lshrrev_b16_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 | 
			
		||||
; GFX9-NEXT:    s_setpc_b64 s[30:31]
 | 
			
		||||
  %result = lshr i8 %value, 7
 | 
			
		||||
  ret i8 %result
 | 
			
		||||
| 
						 | 
				
			
			@ -557,13 +557,11 @@ define amdgpu_ps i32 @s_lshr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amou
 | 
			
		|||
;
 | 
			
		||||
; GFX8-LABEL: s_lshr_v2i16:
 | 
			
		||||
; GFX8:       ; %bb.0:
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_mov_b32 s3, 0xffff
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s0, s0, s3
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s1, s1, s3
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s2, s2, s3
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s4, s4, s3
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s0, s0, s1
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s1, s2, s4
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
 | 
			
		||||
| 
						 | 
				
			
			@ -740,21 +738,17 @@ define amdgpu_ps <2 x i32> @s_lshr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg
 | 
			
		|||
;
 | 
			
		||||
; GFX8-LABEL: s_lshr_v4i16:
 | 
			
		||||
; GFX8:       ; %bb.0:
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s4, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_mov_b32 s6, 0xffff
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s4, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s7, s2, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s5, s1, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s8, s3, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s0, s0, s6
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s2, s2, s6
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s4, s4, s6
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s7, s7, s6
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s0, s0, s2
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s2, s4, s7
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s5, s1, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s8, s3, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s1, s1, s6
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s3, s3, s6
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s5, s5, s6
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s8, s8, s6
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s1, s1, s3
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s3, s5, s8
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
 | 
			
		||||
| 
						 | 
				
			
			@ -932,39 +926,31 @@ define amdgpu_ps <4 x i32> @s_lshr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg
 | 
			
		|||
;
 | 
			
		||||
; GFX8-LABEL: s_lshr_v8i16:
 | 
			
		||||
; GFX8:       ; %bb.0:
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s8, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_mov_b32 s12, 0xffff
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s8, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s13, s4, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s9, s1, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s14, s5, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s0, s0, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s4, s4, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s8, s8, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s13, s13, s12
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s10, s2, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s15, s6, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s0, s0, s4
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s4, s8, s13
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s9, s1, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s14, s5, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s1, s1, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s5, s5, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s9, s9, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s14, s14, s12
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s11, s3, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s16, s7, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s1, s1, s5
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s10, s2, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s15, s6, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s2, s2, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s6, s6, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s10, s10, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s15, s15, s12
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s5, s9, s14
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s4, s4, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s0, s0, s12
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s2, s2, s6
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s11, s3, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s16, s7, 16
 | 
			
		||||
; GFX8-NEXT:    s_or_b32 s0, s4, s0
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s3, s3, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s7, s7, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s11, s11, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s16, s16, s12
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s6, s10, s15
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s4, s5, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s1, s1, s12
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,22 @@
 | 
			
		|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 | 
			
		||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
 | 
			
		||||
 | 
			
		||||
---
 | 
			
		||||
name:  remove_and_255_zextload
 | 
			
		||||
legalized:       true
 | 
			
		||||
tracksRegLiveness: true
 | 
			
		||||
body:             |
 | 
			
		||||
  bb.0:
 | 
			
		||||
    liveins: $vgpr0_vgpr1
 | 
			
		||||
    ; CHECK-LABEL: name: remove_and_255_zextload
 | 
			
		||||
    ; CHECK: liveins: $vgpr0_vgpr1
 | 
			
		||||
    ; CHECK: %ptr:_(p1) = COPY $vgpr0_vgpr1
 | 
			
		||||
    ; CHECK: %load:_(s32) = G_ZEXTLOAD %ptr(p1) :: (load 1, addrspace 1)
 | 
			
		||||
    ; CHECK: $vgpr0 = COPY %load(s32)
 | 
			
		||||
    %ptr:_(p1) = COPY $vgpr0_vgpr1
 | 
			
		||||
    %load:_(s32) = G_ZEXTLOAD %ptr :: (load 1, addrspace 1, align 1)
 | 
			
		||||
    %mask:_(s32) = G_CONSTANT i32 255
 | 
			
		||||
    %and:_(s32) = G_AND %load, %mask
 | 
			
		||||
    $vgpr0 = COPY %and
 | 
			
		||||
 | 
			
		||||
...
 | 
			
		||||
| 
						 | 
				
			
			@ -461,7 +461,6 @@ define amdgpu_ps <2 x i32> @s_shl_v2i32_zext_v2i16(<2 x i16> inreg %x) {
 | 
			
		|||
; GFX7-NEXT:    s_and_b32 s0, s0, 0x3fff3fff
 | 
			
		||||
; GFX7-NEXT:    s_lshr_b32 s1, s0, 16
 | 
			
		||||
; GFX7-NEXT:    s_and_b32 s0, s0, s2
 | 
			
		||||
; GFX7-NEXT:    s_and_b32 s1, s1, s2
 | 
			
		||||
; GFX7-NEXT:    s_lshl_b32 s0, s0, 2
 | 
			
		||||
; GFX7-NEXT:    s_lshl_b32 s1, s1, 2
 | 
			
		||||
; GFX7-NEXT:    ; return to shader part epilog
 | 
			
		||||
| 
						 | 
				
			
			@ -473,7 +472,6 @@ define amdgpu_ps <2 x i32> @s_shl_v2i32_zext_v2i16(<2 x i16> inreg %x) {
 | 
			
		|||
; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_mov_b32 s3, s2
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s0, s0, s4
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s1, s1, s4
 | 
			
		||||
; GFX8-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
 | 
			
		||||
; GFX8-NEXT:    s_mov_b32 s5, s4
 | 
			
		||||
; GFX8-NEXT:    s_and_b64 s[0:1], s[0:1], s[4:5]
 | 
			
		||||
| 
						 | 
				
			
			@ -485,9 +483,7 @@ define amdgpu_ps <2 x i32> @s_shl_v2i32_zext_v2i16(<2 x i16> inreg %x) {
 | 
			
		|||
; GFX9:       ; %bb.0:
 | 
			
		||||
; GFX9-NEXT:    s_and_b32 s0, s0, 0x3fff3fff
 | 
			
		||||
; GFX9-NEXT:    s_lshr_b32 s1, s0, 16
 | 
			
		||||
; GFX9-NEXT:    s_mov_b32 s2, 0xffff
 | 
			
		||||
; GFX9-NEXT:    s_and_b32 s0, s0, s2
 | 
			
		||||
; GFX9-NEXT:    s_and_b32 s1, s1, s2
 | 
			
		||||
; GFX9-NEXT:    s_and_b32 s0, s0, 0xffff
 | 
			
		||||
; GFX9-NEXT:    s_lshl_b32 s0, s0, 2
 | 
			
		||||
; GFX9-NEXT:    s_lshl_b32 s1, s1, 2
 | 
			
		||||
; GFX9-NEXT:    ; return to shader part epilog
 | 
			
		||||
| 
						 | 
				
			
			@ -506,14 +502,13 @@ define <2 x i32> @v_shl_v2i32_zext_v2i16(<2 x i16> %x) {
 | 
			
		|||
; GFX7-LABEL: v_shl_v2i32_zext_v2i16:
 | 
			
		||||
; GFX7:       ; %bb.0:
 | 
			
		||||
; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 | 
			
		||||
; GFX7-NEXT:    v_mov_b32_e32 v2, 0xffff
 | 
			
		||||
; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX7-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 | 
			
		||||
; GFX7-NEXT:    v_and_b32_e32 v0, v0, v2
 | 
			
		||||
; GFX7-NEXT:    v_or_b32_e32 v0, v1, v0
 | 
			
		||||
; GFX7-NEXT:    v_and_b32_e32 v0, 0x3fff3fff, v0
 | 
			
		||||
; GFX7-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
 | 
			
		||||
; GFX7-NEXT:    s_mov_b32 s4, 0xffff
 | 
			
		||||
; GFX7-NEXT:    v_and_b32_e32 v0, s4, v0
 | 
			
		||||
; GFX7-NEXT:    v_and_b32_e32 v1, s4, v1
 | 
			
		||||
; GFX7-NEXT:    v_and_b32_e32 v0, v0, v2
 | 
			
		||||
; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 | 
			
		||||
; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
 | 
			
		||||
; GFX7-NEXT:    s_setpc_b64 s[30:31]
 | 
			
		||||
| 
						 | 
				
			
			@ -521,24 +516,19 @@ define <2 x i32> @v_shl_v2i32_zext_v2i16(<2 x i16> %x) {
 | 
			
		|||
; GFX8-LABEL: v_shl_v2i32_zext_v2i16:
 | 
			
		||||
; GFX8:       ; %bb.0:
 | 
			
		||||
; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 | 
			
		||||
; GFX8-NEXT:    s_mov_b32 s4, 0xffff
 | 
			
		||||
; GFX8-NEXT:    v_and_b32_e32 v0, 0x3fff3fff, v0
 | 
			
		||||
; GFX8-NEXT:    v_mov_b32_e32 v1, s4
 | 
			
		||||
; GFX8-NEXT:    v_and_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 | 
			
		||||
; GFX8-NEXT:    v_and_b32_e32 v1, 0x3fff3fff, v0
 | 
			
		||||
; GFX8-NEXT:    v_mov_b32_e32 v2, 2
 | 
			
		||||
; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
 | 
			
		||||
; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
 | 
			
		||||
; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
 | 
			
		||||
; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 | 
			
		||||
; GFX8-NEXT:    s_setpc_b64 s[30:31]
 | 
			
		||||
;
 | 
			
		||||
; GFX9-LABEL: v_shl_v2i32_zext_v2i16:
 | 
			
		||||
; GFX9:       ; %bb.0:
 | 
			
		||||
; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 | 
			
		||||
; GFX9-NEXT:    v_and_b32_e32 v0, 0x3fff3fff, v0
 | 
			
		||||
; GFX9-NEXT:    s_mov_b32 s5, 0xffff
 | 
			
		||||
; GFX9-NEXT:    v_and_b32_sdwa v1, v0, s5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 | 
			
		||||
; GFX9-NEXT:    s_mov_b32 s4, 2
 | 
			
		||||
; GFX9-NEXT:    v_lshlrev_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
 | 
			
		||||
; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v1
 | 
			
		||||
; GFX9-NEXT:    v_and_b32_e32 v1, 0x3fff3fff, v0
 | 
			
		||||
; GFX9-NEXT:    v_lshlrev_b32_sdwa v0, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
 | 
			
		||||
; GFX9-NEXT:    v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 | 
			
		||||
; GFX9-NEXT:    s_setpc_b64 s[30:31]
 | 
			
		||||
  %and = and <2 x i16> %x, <i16 16383, i16 16383>
 | 
			
		||||
  %ext = zext <2 x i16> %and to <2 x i32>
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -551,13 +551,11 @@ define amdgpu_ps i32 @s_shl_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amoun
 | 
			
		|||
;
 | 
			
		||||
; GFX8-LABEL: s_shl_v2i16:
 | 
			
		||||
; GFX8:       ; %bb.0:
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_mov_b32 s3, 0xffff
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s2, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s4, s1, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s0, s0, s3
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s1, s1, s3
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s2, s2, s3
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s4, s4, s3
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s0, s0, s1
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s1, s2, s4
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
 | 
			
		||||
| 
						 | 
				
			
			@ -722,21 +720,17 @@ define amdgpu_ps <2 x i32> @s_shl_v4i16(<4 x i16> inreg %value, <4 x i16> inreg
 | 
			
		|||
;
 | 
			
		||||
; GFX8-LABEL: s_shl_v4i16:
 | 
			
		||||
; GFX8:       ; %bb.0:
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s4, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_mov_b32 s6, 0xffff
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s4, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s7, s2, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s5, s1, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s8, s3, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s0, s0, s6
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s2, s2, s6
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s4, s4, s6
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s7, s7, s6
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s0, s0, s2
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s2, s4, s7
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s5, s1, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s8, s3, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s1, s1, s6
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s3, s3, s6
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s5, s5, s6
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s8, s8, s6
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s1, s1, s3
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s3, s5, s8
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s2, s2, 16
 | 
			
		||||
| 
						 | 
				
			
			@ -898,39 +892,31 @@ define amdgpu_ps <4 x i32> @s_shl_v8i16(<8 x i16> inreg %value, <8 x i16> inreg
 | 
			
		|||
;
 | 
			
		||||
; GFX8-LABEL: s_shl_v8i16:
 | 
			
		||||
; GFX8:       ; %bb.0:
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s8, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_mov_b32 s12, 0xffff
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s8, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s13, s4, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s9, s1, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s14, s5, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s0, s0, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s4, s4, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s8, s8, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s13, s13, s12
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s10, s2, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s15, s6, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s0, s0, s4
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s4, s8, s13
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s9, s1, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s14, s5, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s1, s1, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s5, s5, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s9, s9, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s14, s14, s12
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s11, s3, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s16, s7, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s1, s1, s5
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s10, s2, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s15, s6, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s2, s2, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s6, s6, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s10, s10, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s15, s15, s12
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s5, s9, s14
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s4, s4, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s0, s0, s12
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s2, s2, s6
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s11, s3, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s16, s7, 16
 | 
			
		||||
; GFX8-NEXT:    s_or_b32 s0, s4, s0
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s3, s3, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s7, s7, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s11, s11, s12
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s16, s16, s12
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s6, s10, s15
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s4, s5, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s1, s1, s12
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -205,10 +205,7 @@ define i16 @v_uaddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
 | 
			
		|||
; GFX6-NEXT:    v_min_u32_e32 v2, v3, v2
 | 
			
		||||
; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 24, v1
 | 
			
		||||
; GFX6-NEXT:    v_mov_b32_e32 v2, 0xff
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, v1, v2
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v0, v0, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX6-NEXT:    s_setpc_b64 s[30:31]
 | 
			
		||||
| 
						 | 
				
			
			@ -223,10 +220,9 @@ define i16 @v_uaddsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
 | 
			
		|||
; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
 | 
			
		||||
; GFX8-NEXT:    v_add_u16_e64 v0, v0, v1 clamp
 | 
			
		||||
; GFX8-NEXT:    v_add_u16_e64 v1, v3, v2 clamp
 | 
			
		||||
; GFX8-NEXT:    v_mov_b32_e32 v2, 0xff
 | 
			
		||||
; GFX8-NEXT:    v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 | 
			
		||||
; GFX8-NEXT:    v_and_b32_sdwa v1, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 | 
			
		||||
; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 8, v1
 | 
			
		||||
; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
 | 
			
		||||
; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 | 
			
		||||
; GFX8-NEXT:    s_setpc_b64 s[30:31]
 | 
			
		||||
;
 | 
			
		||||
; GFX9-LABEL: v_uaddsat_v2i8:
 | 
			
		||||
| 
						 | 
				
			
			@ -291,10 +287,7 @@ define amdgpu_ps i16 @s_uaddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 | 
			
		|||
; GFX6-NEXT:    s_cmp_lt_u32 s3, s2
 | 
			
		||||
; GFX6-NEXT:    s_cselect_b32 s2, s3, s2
 | 
			
		||||
; GFX6-NEXT:    s_add_i32 s1, s1, s2
 | 
			
		||||
; GFX6-NEXT:    s_movk_i32 s2, 0xff
 | 
			
		||||
; GFX6-NEXT:    s_lshr_b32 s1, s1, 24
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s1, s2
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s0, s0, s2
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s1, 8
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s0, s0, s1
 | 
			
		||||
; GFX6-NEXT:    ; return to shader part epilog
 | 
			
		||||
| 
						 | 
				
			
			@ -311,11 +304,10 @@ define amdgpu_ps i16 @s_uaddsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 | 
			
		|||
; GFX8-NEXT:    v_add_u16_e64 v0, s0, v0 clamp
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s0, s2, s4
 | 
			
		||||
; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 | 
			
		||||
; GFX8-NEXT:    v_mov_b32_e32 v2, 0xff
 | 
			
		||||
; GFX8-NEXT:    v_add_u16_e64 v1, s0, v1 clamp
 | 
			
		||||
; GFX8-NEXT:    v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 | 
			
		||||
; GFX8-NEXT:    v_and_b32_sdwa v1, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 | 
			
		||||
; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 8, v1
 | 
			
		||||
; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
 | 
			
		||||
; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 | 
			
		||||
; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
 | 
			
		||||
; GFX8-NEXT:    ; return to shader part epilog
 | 
			
		||||
;
 | 
			
		||||
| 
						 | 
				
			
			@ -399,24 +391,19 @@ define i32 @v_uaddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 | 
			
		|||
; GFX6-NEXT:    v_min_u32_e32 v3, v5, v3
 | 
			
		||||
; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 24, v4
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 24, v1
 | 
			
		||||
; GFX6-NEXT:    s_movk_i32 s4, 0xff
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s4, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 24, v7
 | 
			
		||||
; GFX6-NEXT:    v_xor_b32_e32 v5, -1, v3
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 24, v1
 | 
			
		||||
; GFX6-NEXT:    v_min_u32_e32 v4, v5, v4
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 24, v2
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v0, s4, v0
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
 | 
			
		||||
; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 24, v2
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s4, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 24, v3
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s4, v3
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX6-NEXT:    s_setpc_b64 s[30:31]
 | 
			
		||||
;
 | 
			
		||||
| 
						 | 
				
			
			@ -565,17 +552,12 @@ define amdgpu_ps i32 @s_uaddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 | 
			
		|||
; GFX6-NEXT:    s_cmp_lt_u32 s5, s4
 | 
			
		||||
; GFX6-NEXT:    s_cselect_b32 s4, s5, s4
 | 
			
		||||
; GFX6-NEXT:    s_add_i32 s3, s3, s4
 | 
			
		||||
; GFX6-NEXT:    s_movk_i32 s4, 0xff
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s1, s4
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s0, s0, s4
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s1, 8
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s0, s0, s1
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s2, s4
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s2, 16
 | 
			
		||||
; GFX6-NEXT:    s_lshr_b32 s3, s3, 24
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s0, s0, s1
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s3, s4
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s1, 24
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s3, 24
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s0, s0, s1
 | 
			
		||||
; GFX6-NEXT:    ; return to shader part epilog
 | 
			
		||||
;
 | 
			
		||||
| 
						 | 
				
			
			@ -1892,10 +1874,7 @@ define amdgpu_ps i32 @s_uaddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
 | 
			
		|||
; GFX6-NEXT:    s_cmp_lt_u32 s3, s2
 | 
			
		||||
; GFX6-NEXT:    s_cselect_b32 s2, s3, s2
 | 
			
		||||
; GFX6-NEXT:    s_add_i32 s1, s1, s2
 | 
			
		||||
; GFX6-NEXT:    s_mov_b32 s2, 0xffff
 | 
			
		||||
; GFX6-NEXT:    s_lshr_b32 s1, s1, 16
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s1, s2
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s0, s0, s2
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s0, s0, s1
 | 
			
		||||
; GFX6-NEXT:    ; return to shader part epilog
 | 
			
		||||
| 
						 | 
				
			
			@ -1946,10 +1925,7 @@ define amdgpu_ps float @uaddsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
 | 
			
		|||
; GFX6-NEXT:    v_min_u32_e32 v1, s1, v1
 | 
			
		||||
; GFX6-NEXT:    v_add_i32_e32 v1, vcc, s0, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    s_mov_b32 s0, 0xffff
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s0, v1
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v0, s0, v0
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX6-NEXT:    ; return to shader part epilog
 | 
			
		||||
| 
						 | 
				
			
			@ -1994,10 +1970,7 @@ define amdgpu_ps float @uaddsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
 | 
			
		|||
; GFX6-NEXT:    v_min_u32_e32 v2, s0, v2
 | 
			
		||||
; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    s_mov_b32 s0, 0xffff
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s0, v1
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v0, s0, v0
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX6-NEXT:    ; return to shader part epilog
 | 
			
		||||
| 
						 | 
				
			
			@ -2063,19 +2036,14 @@ define <2 x float> @v_uaddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 | 
			
		|||
; GFX6-NEXT:    v_xor_b32_e32 v5, -1, v3
 | 
			
		||||
; GFX6-NEXT:    v_min_u32_e32 v4, v5, v4
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    s_mov_b32 s4, 0xffff
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 | 
			
		||||
; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s4, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v0, s4, v0
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s4, v2
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v2, s4, v3
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
 | 
			
		||||
; GFX6-NEXT:    s_setpc_b64 s[30:31]
 | 
			
		||||
;
 | 
			
		||||
; GFX8-LABEL: v_uaddsat_v4i16:
 | 
			
		||||
| 
						 | 
				
			
			@ -2142,16 +2110,11 @@ define amdgpu_ps <2 x i32> @s_uaddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
 | 
			
		|||
; GFX6-NEXT:    s_cmp_lt_u32 s5, s4
 | 
			
		||||
; GFX6-NEXT:    s_cselect_b32 s4, s5, s4
 | 
			
		||||
; GFX6-NEXT:    s_add_i32 s3, s3, s4
 | 
			
		||||
; GFX6-NEXT:    s_mov_b32 s4, 0xffff
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s1, s4
 | 
			
		||||
; GFX6-NEXT:    s_lshr_b32 s3, s3, 16
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s0, s0, s4
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
 | 
			
		||||
; GFX6-NEXT:    s_lshr_b32 s3, s3, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s0, s0, s1
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s2, s4
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s2, s3, s4
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s1, s1, s2
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s3, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s1, s2, s1
 | 
			
		||||
; GFX6-NEXT:    ; return to shader part epilog
 | 
			
		||||
;
 | 
			
		||||
; GFX8-LABEL: s_uaddsat_v4i16:
 | 
			
		||||
| 
						 | 
				
			
			@ -2241,29 +2204,22 @@ define <3 x float> @v_uaddsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
 | 
			
		|||
; GFX6-NEXT:    v_min_u32_e32 v6, v7, v6
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
 | 
			
		||||
; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    s_mov_b32 s4, 0xffff
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v6, 16, v11
 | 
			
		||||
; GFX6-NEXT:    v_xor_b32_e32 v7, -1, v5
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s4, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    v_min_u32_e32 v6, v7, v6
 | 
			
		||||
; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v0, s4, v0
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s4, v2
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v2, s4, v3
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v3, s4, v5
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v2, s4, v4
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v2, v4, v2
 | 
			
		||||
; GFX6-NEXT:    s_setpc_b64 s[30:31]
 | 
			
		||||
;
 | 
			
		||||
; GFX8-LABEL: v_uaddsat_v6i16:
 | 
			
		||||
| 
						 | 
				
			
			@ -2351,20 +2307,13 @@ define amdgpu_ps <3 x i32> @s_uaddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 | 
			
		|||
; GFX6-NEXT:    s_cmp_lt_u32 s7, s6
 | 
			
		||||
; GFX6-NEXT:    s_cselect_b32 s6, s7, s6
 | 
			
		||||
; GFX6-NEXT:    s_add_i32 s5, s5, s6
 | 
			
		||||
; GFX6-NEXT:    s_mov_b32 s6, 0xffff
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s1, s6
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s0, s0, s6
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s0, s0, s1
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s2, s6
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s2, s3, s6
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s3, 16
 | 
			
		||||
; GFX6-NEXT:    s_lshr_b32 s5, s5, 16
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s3, s5, s6
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s1, s1, s2
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s2, s4, s6
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s2, s2, s3
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s1, s2, s1
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s2, s5, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s2, s4, s2
 | 
			
		||||
; GFX6-NEXT:    ; return to shader part epilog
 | 
			
		||||
;
 | 
			
		||||
; GFX8-LABEL: s_uaddsat_v6i16:
 | 
			
		||||
| 
						 | 
				
			
			@ -2466,36 +2415,27 @@ define <4 x float> @v_uaddsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 | 
			
		|||
; GFX6-NEXT:    v_xor_b32_e32 v9, -1, v6
 | 
			
		||||
; GFX6-NEXT:    v_min_u32_e32 v8, v9, v8
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    s_mov_b32 s4, 0xffff
 | 
			
		||||
; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s4, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v8, 16, v15
 | 
			
		||||
; GFX6-NEXT:    v_xor_b32_e32 v9, -1, v7
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    v_min_u32_e32 v8, v9, v8
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v0, s4, v0
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s4, v2
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v2, s4, v3
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v3, s4, v5
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v2, s4, v4
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v4, s4, v7
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v3, s4, v6
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v3, v3, v4
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v2, v4, v2
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v3, v6, v3
 | 
			
		||||
; GFX6-NEXT:    s_setpc_b64 s[30:31]
 | 
			
		||||
;
 | 
			
		||||
; GFX8-LABEL: v_uaddsat_v8i16:
 | 
			
		||||
| 
						 | 
				
			
			@ -2603,24 +2543,15 @@ define amdgpu_ps <4 x i32> @s_uaddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 | 
			
		|||
; GFX6-NEXT:    s_cmp_lt_u32 s9, s8
 | 
			
		||||
; GFX6-NEXT:    s_cselect_b32 s8, s9, s8
 | 
			
		||||
; GFX6-NEXT:    s_add_i32 s7, s7, s8
 | 
			
		||||
; GFX6-NEXT:    s_mov_b32 s8, 0xffff
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s1, s8
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s0, s0, s8
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s0, s0, s1
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s2, s8
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s2, s3, s8
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s3, s5, s8
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s3, 16
 | 
			
		||||
; GFX6-NEXT:    s_lshr_b32 s7, s7, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s1, s1, s2
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s2, s4, s8
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s4, s7, s8
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s2, s2, s3
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s3, s6, s8
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s4, s4, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s3, s3, s4
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s3, s7, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s1, s2, s1
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s2, s5, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s2, s4, s2
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s3, s6, s3
 | 
			
		||||
; GFX6-NEXT:    ; return to shader part epilog
 | 
			
		||||
;
 | 
			
		||||
; GFX8-LABEL: s_uaddsat_v8i16:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -199,10 +199,7 @@ define i16 @v_usubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
 | 
			
		|||
; GFX6-NEXT:    v_min_u32_e32 v2, v1, v2
 | 
			
		||||
; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 24, v1
 | 
			
		||||
; GFX6-NEXT:    v_mov_b32_e32 v2, 0xff
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, v1, v2
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v0, v0, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX6-NEXT:    s_setpc_b64 s[30:31]
 | 
			
		||||
| 
						 | 
				
			
			@ -217,10 +214,9 @@ define i16 @v_usubsat_v2i8(i16 %lhs.arg, i16 %rhs.arg) {
 | 
			
		|||
; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
 | 
			
		||||
; GFX8-NEXT:    v_sub_u16_e64 v0, v0, v1 clamp
 | 
			
		||||
; GFX8-NEXT:    v_sub_u16_e64 v1, v3, v2 clamp
 | 
			
		||||
; GFX8-NEXT:    v_mov_b32_e32 v2, 0xff
 | 
			
		||||
; GFX8-NEXT:    v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 | 
			
		||||
; GFX8-NEXT:    v_and_b32_sdwa v1, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 | 
			
		||||
; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 8, v1
 | 
			
		||||
; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
 | 
			
		||||
; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 | 
			
		||||
; GFX8-NEXT:    s_setpc_b64 s[30:31]
 | 
			
		||||
;
 | 
			
		||||
; GFX9-LABEL: v_usubsat_v2i8:
 | 
			
		||||
| 
						 | 
				
			
			@ -283,10 +279,7 @@ define amdgpu_ps i16 @s_usubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 | 
			
		|||
; GFX6-NEXT:    s_cmp_lt_u32 s1, s2
 | 
			
		||||
; GFX6-NEXT:    s_cselect_b32 s2, s1, s2
 | 
			
		||||
; GFX6-NEXT:    s_sub_i32 s1, s1, s2
 | 
			
		||||
; GFX6-NEXT:    s_movk_i32 s2, 0xff
 | 
			
		||||
; GFX6-NEXT:    s_lshr_b32 s1, s1, 24
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s1, s2
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s0, s0, s2
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s1, 8
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s0, s0, s1
 | 
			
		||||
; GFX6-NEXT:    ; return to shader part epilog
 | 
			
		||||
| 
						 | 
				
			
			@ -303,11 +296,10 @@ define amdgpu_ps i16 @s_usubsat_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg) {
 | 
			
		|||
; GFX8-NEXT:    v_sub_u16_e64 v0, s0, v0 clamp
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s0, s2, s4
 | 
			
		||||
; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 | 
			
		||||
; GFX8-NEXT:    v_mov_b32_e32 v2, 0xff
 | 
			
		||||
; GFX8-NEXT:    v_sub_u16_e64 v1, s0, v1 clamp
 | 
			
		||||
; GFX8-NEXT:    v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 | 
			
		||||
; GFX8-NEXT:    v_and_b32_sdwa v1, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 | 
			
		||||
; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 8, v1
 | 
			
		||||
; GFX8-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
 | 
			
		||||
; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
 | 
			
		||||
; GFX8-NEXT:    v_readfirstlane_b32 s0, v0
 | 
			
		||||
; GFX8-NEXT:    ; return to shader part epilog
 | 
			
		||||
;
 | 
			
		||||
| 
						 | 
				
			
			@ -386,25 +378,20 @@ define i32 @v_usubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 | 
			
		|||
; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 24, v3
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 24, v6
 | 
			
		||||
; GFX6-NEXT:    v_min_u32_e32 v3, v2, v3
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 24, v1
 | 
			
		||||
; GFX6-NEXT:    s_movk_i32 s4, 0xff
 | 
			
		||||
; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s4, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 24, v4
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 24, v7
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 24, v1
 | 
			
		||||
; GFX6-NEXT:    v_min_u32_e32 v4, v3, v4
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 24, v2
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v0, s4, v0
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
 | 
			
		||||
; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, v3, v4
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 24, v2
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s4, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 24, v3
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s4, v3
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX6-NEXT:    s_setpc_b64 s[30:31]
 | 
			
		||||
;
 | 
			
		||||
| 
						 | 
				
			
			@ -549,17 +536,12 @@ define amdgpu_ps i32 @s_usubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 | 
			
		|||
; GFX6-NEXT:    s_cmp_lt_u32 s3, s4
 | 
			
		||||
; GFX6-NEXT:    s_cselect_b32 s4, s3, s4
 | 
			
		||||
; GFX6-NEXT:    s_sub_i32 s3, s3, s4
 | 
			
		||||
; GFX6-NEXT:    s_movk_i32 s4, 0xff
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s1, s4
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s0, s0, s4
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s1, 8
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s0, s0, s1
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s2, s4
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s2, 16
 | 
			
		||||
; GFX6-NEXT:    s_lshr_b32 s3, s3, 24
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s0, s0, s1
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s3, s4
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s1, 24
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s3, 24
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s0, s0, s1
 | 
			
		||||
; GFX6-NEXT:    ; return to shader part epilog
 | 
			
		||||
;
 | 
			
		||||
| 
						 | 
				
			
			@ -1802,10 +1784,7 @@ define amdgpu_ps i32 @s_usubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
 | 
			
		|||
; GFX6-NEXT:    s_cmp_lt_u32 s1, s2
 | 
			
		||||
; GFX6-NEXT:    s_cselect_b32 s2, s1, s2
 | 
			
		||||
; GFX6-NEXT:    s_sub_i32 s1, s1, s2
 | 
			
		||||
; GFX6-NEXT:    s_mov_b32 s2, 0xffff
 | 
			
		||||
; GFX6-NEXT:    s_lshr_b32 s1, s1, 16
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s1, s2
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s0, s0, s2
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s0, s0, s1
 | 
			
		||||
; GFX6-NEXT:    ; return to shader part epilog
 | 
			
		||||
| 
						 | 
				
			
			@ -1854,10 +1833,7 @@ define amdgpu_ps float @usubsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
 | 
			
		|||
; GFX6-NEXT:    v_min_u32_e32 v1, s0, v1
 | 
			
		||||
; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, s0, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    s_mov_b32 s0, 0xffff
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s0, v1
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v0, s0, v0
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX6-NEXT:    ; return to shader part epilog
 | 
			
		||||
| 
						 | 
				
			
			@ -1900,10 +1876,7 @@ define amdgpu_ps float @usubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
 | 
			
		|||
; GFX6-NEXT:    v_min_u32_e32 v2, s0, v1
 | 
			
		||||
; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    s_mov_b32 s0, 0xffff
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s0, v1
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v0, s0, v0
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX6-NEXT:    ; return to shader part epilog
 | 
			
		||||
| 
						 | 
				
			
			@ -1965,19 +1938,14 @@ define <2 x float> @v_usubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 | 
			
		|||
; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v7
 | 
			
		||||
; GFX6-NEXT:    v_min_u32_e32 v4, v3, v4
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    s_mov_b32 s4, 0xffff
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 | 
			
		||||
; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, v3, v4
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s4, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v0, s4, v0
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s4, v2
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v2, s4, v3
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
 | 
			
		||||
; GFX6-NEXT:    s_setpc_b64 s[30:31]
 | 
			
		||||
;
 | 
			
		||||
; GFX8-LABEL: v_usubsat_v4i16:
 | 
			
		||||
| 
						 | 
				
			
			@ -2040,16 +2008,11 @@ define amdgpu_ps <2 x i32> @s_usubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
 | 
			
		|||
; GFX6-NEXT:    s_cmp_lt_u32 s3, s4
 | 
			
		||||
; GFX6-NEXT:    s_cselect_b32 s4, s3, s4
 | 
			
		||||
; GFX6-NEXT:    s_sub_i32 s3, s3, s4
 | 
			
		||||
; GFX6-NEXT:    s_mov_b32 s4, 0xffff
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s1, s4
 | 
			
		||||
; GFX6-NEXT:    s_lshr_b32 s3, s3, 16
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s0, s0, s4
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
 | 
			
		||||
; GFX6-NEXT:    s_lshr_b32 s3, s3, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s0, s0, s1
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s2, s4
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s2, s3, s4
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s1, s1, s2
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s3, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s1, s2, s1
 | 
			
		||||
; GFX6-NEXT:    ; return to shader part epilog
 | 
			
		||||
;
 | 
			
		||||
; GFX8-LABEL: s_usubsat_v4i16:
 | 
			
		||||
| 
						 | 
				
			
			@ -2133,29 +2096,22 @@ define <3 x float> @v_usubsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
 | 
			
		|||
; GFX6-NEXT:    v_lshlrev_b32_e32 v6, 16, v10
 | 
			
		||||
; GFX6-NEXT:    v_min_u32_e32 v6, v4, v6
 | 
			
		||||
; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v4, v6
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    s_mov_b32 s4, 0xffff
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v6, 16, v11
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    v_min_u32_e32 v6, v5, v6
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s4, v1
 | 
			
		||||
; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, v5, v6
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v0, s4, v0
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s4, v2
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v2, s4, v3
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v3, s4, v5
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v2, s4, v4
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v2, v4, v2
 | 
			
		||||
; GFX6-NEXT:    s_setpc_b64 s[30:31]
 | 
			
		||||
;
 | 
			
		||||
; GFX8-LABEL: v_usubsat_v6i16:
 | 
			
		||||
| 
						 | 
				
			
			@ -2237,20 +2193,13 @@ define amdgpu_ps <3 x i32> @s_usubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 | 
			
		|||
; GFX6-NEXT:    s_cmp_lt_u32 s5, s6
 | 
			
		||||
; GFX6-NEXT:    s_cselect_b32 s6, s5, s6
 | 
			
		||||
; GFX6-NEXT:    s_sub_i32 s5, s5, s6
 | 
			
		||||
; GFX6-NEXT:    s_mov_b32 s6, 0xffff
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s1, s6
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s0, s0, s6
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s0, s0, s1
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s2, s6
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s2, s3, s6
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s3, 16
 | 
			
		||||
; GFX6-NEXT:    s_lshr_b32 s5, s5, 16
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s3, s5, s6
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s1, s1, s2
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s2, s4, s6
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s2, s2, s3
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s1, s2, s1
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s2, s5, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s2, s4, s2
 | 
			
		||||
; GFX6-NEXT:    ; return to shader part epilog
 | 
			
		||||
;
 | 
			
		||||
; GFX8-LABEL: s_usubsat_v6i16:
 | 
			
		||||
| 
						 | 
				
			
			@ -2344,36 +2293,27 @@ define <4 x float> @v_usubsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 | 
			
		|||
; GFX6-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v8, 16, v14
 | 
			
		||||
; GFX6-NEXT:    v_min_u32_e32 v8, v6, v8
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    s_mov_b32 s4, 0xffff
 | 
			
		||||
; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, v6, v8
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s4, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v8, 16, v15
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    v_min_u32_e32 v8, v7, v8
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v0, s4, v0
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, v7, v8
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v1, s4, v2
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v2, s4, v3
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v3, s4, v5
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v2, s4, v4
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v4, s4, v7
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
 | 
			
		||||
; GFX6-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
 | 
			
		||||
; GFX6-NEXT:    v_and_b32_e32 v3, s4, v6
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v3, v3, v4
 | 
			
		||||
; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v2, v4, v2
 | 
			
		||||
; GFX6-NEXT:    v_or_b32_e32 v3, v6, v3
 | 
			
		||||
; GFX6-NEXT:    s_setpc_b64 s[30:31]
 | 
			
		||||
;
 | 
			
		||||
; GFX8-LABEL: v_usubsat_v8i16:
 | 
			
		||||
| 
						 | 
				
			
			@ -2473,24 +2413,15 @@ define amdgpu_ps <4 x i32> @s_usubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 | 
			
		|||
; GFX6-NEXT:    s_cmp_lt_u32 s7, s8
 | 
			
		||||
; GFX6-NEXT:    s_cselect_b32 s8, s7, s8
 | 
			
		||||
; GFX6-NEXT:    s_sub_i32 s7, s7, s8
 | 
			
		||||
; GFX6-NEXT:    s_mov_b32 s8, 0xffff
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s1, s8
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s0, s0, s8
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s0, s0, s1
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s1, s2, s8
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s2, s3, s8
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s3, s5, s8
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s1, s3, 16
 | 
			
		||||
; GFX6-NEXT:    s_lshr_b32 s7, s7, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s1, s1, s2
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s2, s4, s8
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s4, s7, s8
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s2, s2, s3
 | 
			
		||||
; GFX6-NEXT:    s_and_b32 s3, s6, s8
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s4, s4, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s3, s3, s4
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s3, s7, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s1, s2, s1
 | 
			
		||||
; GFX6-NEXT:    s_lshl_b32 s2, s5, 16
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s2, s4, s2
 | 
			
		||||
; GFX6-NEXT:    s_or_b32 s3, s6, s3
 | 
			
		||||
; GFX6-NEXT:    ; return to shader part epilog
 | 
			
		||||
;
 | 
			
		||||
; GFX8-LABEL: s_usubsat_v8i16:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -37,7 +37,6 @@ define amdgpu_ps i32 @scalar_xnor_v2i16_one_use(<2 x i16> inreg %a, <2 x i16> in
 | 
			
		|||
; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_mov_b32 s3, s2
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s0, s0, s2
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s1, s1, s2
 | 
			
		||||
; GFX8-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
 | 
			
		||||
; GFX8-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
 | 
			
		||||
| 
						 | 
				
			
			@ -118,21 +117,19 @@ define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> in
 | 
			
		|||
; GFX8-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
 | 
			
		||||
; GFX8-NEXT:    s_mov_b32 s4, 0xffff
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s3, s0, 16
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s5, s1, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s2, s0, s4
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s0, s1, s4
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s1, s5, s4
 | 
			
		||||
; GFX8-NEXT:    s_mov_b32 s5, s4
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s3, s3, s4
 | 
			
		||||
; GFX8-NEXT:    s_and_b64 s[2:3], s[2:3], s[4:5]
 | 
			
		||||
; GFX8-NEXT:    s_and_b64 s[0:1], s[0:1], s[4:5]
 | 
			
		||||
; GFX8-NEXT:    s_lshr_b32 s7, s1, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s6, s1, s4
 | 
			
		||||
; GFX8-NEXT:    s_and_b64 s[0:1], s[2:3], s[4:5]
 | 
			
		||||
; GFX8-NEXT:    s_xor_b64 s[0:1], s[0:1], s[4:5]
 | 
			
		||||
; GFX8-NEXT:    s_and_b64 s[2:3], s[6:7], s[4:5]
 | 
			
		||||
; GFX8-NEXT:    s_xor_b64 s[2:3], s[2:3], s[4:5]
 | 
			
		||||
; GFX8-NEXT:    s_xor_b64 s[6:7], s[0:1], s[4:5]
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s1, s2, s4
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s0, s3, 16
 | 
			
		||||
; GFX8-NEXT:    s_or_b32 s0, s0, s1
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s1, s7, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s2, s6, s4
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s0, s0, s4
 | 
			
		||||
; GFX8-NEXT:    s_or_b32 s0, s1, s0
 | 
			
		||||
; GFX8-NEXT:    s_lshl_b32 s1, s3, 16
 | 
			
		||||
; GFX8-NEXT:    s_and_b32 s2, s2, s4
 | 
			
		||||
; GFX8-NEXT:    s_or_b32 s1, s1, s2
 | 
			
		||||
; GFX8-NEXT:    ; return to shader part epilog
 | 
			
		||||
;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue