llvm-project/llvm/test/CodeGen/RISCV/VentusGPGPU/builtin.ll

93 lines
3.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=VENTUS %s
define spir_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
; VENTUS-LABEL: foo_ker:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16
; VENTUS-NEXT: addi tp, tp, 16
; VENTUS-NEXT: .cfi_def_cfa_offset 16
; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill
; VENTUS-NEXT: sw s0, -12(sp) # 4-byte Folded Spill
; VENTUS-NEXT: sw s1, -16(sp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 8
; VENTUS-NEXT: .cfi_offset s0, 4
; VENTUS-NEXT: .cfi_offset s1, 0
; VENTUS-NEXT: lw s0, 0(a0)
; VENTUS-NEXT: lw s1, 4(a0)
; VENTUS-NEXT: vmv.s.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: vmv.s.x v1, zero
; VENTUS-NEXT: vsll.vi v0, v0, 2
; VENTUS-NEXT: vadd.vx v2, v0, s1
; VENTUS-NEXT: vmv.x.s a0, v2
; VENTUS-NEXT: vluxei32.v v2, (a0), v1
; VENTUS-NEXT: vadd.vx v0, v0, s0
; VENTUS-NEXT: vmv.x.s a0, v0
; VENTUS-NEXT: vluxei32.v v0, (a0), v1
; VENTUS-NEXT: vadd.vv v0, v0, v2
; VENTUS-NEXT: vsuxei32.v v0, (a0), v1
; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw s0, -12(sp) # 4-byte Folded Reload
; VENTUS-NEXT: lw s1, -16(sp) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -16
; VENTUS-NEXT: addi tp, tp, -16
; VENTUS-NEXT: ret
entry:
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
%arrayidx = getelementptr inbounds i32, ptr addrspace(1) %B, i32 %call
%0 = load i32, ptr addrspace(1) %arrayidx, align 4
%arrayidx1 = getelementptr inbounds i32, ptr addrspace(1) %A, i32 %call
%1 = load i32, ptr addrspace(1) %arrayidx1, align 4
%add = add nsw i32 %1, %0
store i32 %add, ptr addrspace(1) %arrayidx1, align 4
ret void
}
define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrspace(1) nocapture noundef readonly %B) {
; VENTUS-LABEL: foo_fun:
; VENTUS: # %bb.0: # %entry
; VENTUS-NEXT: addi sp, sp, 16
; VENTUS-NEXT: addi tp, tp, 16
; VENTUS-NEXT: .cfi_def_cfa_offset 16
; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill
; VENTUS-NEXT: vsw v32, -12(tp) # 4-byte Folded Spill
; VENTUS-NEXT: vsw v33, -16(tp) # 4-byte Folded Spill
; VENTUS-NEXT: .cfi_offset ra, 8
; VENTUS-NEXT: .cfi_offset v32.l, 4
; VENTUS-NEXT: .cfi_offset v33.l, 0
; VENTUS-NEXT: vadd.vx v32, v1, zero
; VENTUS-NEXT: vadd.vx v33, v0, zero
; VENTUS-NEXT: vmv.s.x v0, zero
; VENTUS-NEXT: call _Z13get_global_idj
; VENTUS-NEXT: vmv.s.x v1, zero
; VENTUS-NEXT: vsll.vi v0, v0, 2
; VENTUS-NEXT: vadd.vv v2, v32, v0
; VENTUS-NEXT: vmv.x.s a0, v2
; VENTUS-NEXT: vluxei32.v v2, (a0), v1
; VENTUS-NEXT: vadd.vv v0, v33, v0
; VENTUS-NEXT: vmv.x.s a0, v0
; VENTUS-NEXT: vluxei32.v v0, (a0), v1
; VENTUS-NEXT: vadd.vv v0, v0, v2
; VENTUS-NEXT: vsuxei32.v v0, (a0), v1
; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload
; VENTUS-NEXT: vlw v32, -12(tp) # 4-byte Folded Reload
; VENTUS-NEXT: vlw v33, -16(tp) # 4-byte Folded Reload
; VENTUS-NEXT: addi sp, sp, -16
; VENTUS-NEXT: addi tp, tp, -16
; VENTUS-NEXT: ret
entry:
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
%arrayidx = getelementptr inbounds i32, ptr addrspace(1) %B, i32 %call
%0 = load i32, ptr addrspace(1) %arrayidx, align 4
%arrayidx1 = getelementptr inbounds i32, ptr addrspace(1) %A, i32 %call
%1 = load i32, ptr addrspace(1) %arrayidx1, align 4
%add = add nsw i32 %1, %0
store i32 %add, ptr addrspace(1) %arrayidx1, align 4
ret void
}
declare dso_local i32 @_Z13get_global_idj(i32 noundef)