93 lines
3.7 KiB
LLVM
93 lines
3.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=riscv32 -mcpu=ventus-gpgpu -verify-machineinstrs < %s \
|
|
; RUN: | FileCheck -check-prefix=VENTUS %s
|
|
|
|
define spir_kernel void @foo_ker(ptr addrspace(1) nocapture noundef align 4 %A, ptr addrspace(1) nocapture noundef readonly align 4 %B) {
|
|
; VENTUS-LABEL: foo_ker:
|
|
; VENTUS: # %bb.0: # %entry
|
|
; VENTUS-NEXT: addi sp, sp, 16
|
|
; VENTUS-NEXT: addi tp, tp, 16
|
|
; VENTUS-NEXT: .cfi_def_cfa_offset 16
|
|
; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill
|
|
; VENTUS-NEXT: sw s0, -12(sp) # 4-byte Folded Spill
|
|
; VENTUS-NEXT: sw s1, -16(sp) # 4-byte Folded Spill
|
|
; VENTUS-NEXT: .cfi_offset ra, 8
|
|
; VENTUS-NEXT: .cfi_offset s0, 4
|
|
; VENTUS-NEXT: .cfi_offset s1, 0
|
|
; VENTUS-NEXT: lw s0, 0(a0)
|
|
; VENTUS-NEXT: lw s1, 4(a0)
|
|
; VENTUS-NEXT: vmv.s.x v0, zero
|
|
; VENTUS-NEXT: call _Z13get_global_idj
|
|
; VENTUS-NEXT: vmv.s.x v1, zero
|
|
; VENTUS-NEXT: vsll.vi v0, v0, 2
|
|
; VENTUS-NEXT: vadd.vx v2, v0, s1
|
|
; VENTUS-NEXT: vmv.x.s a0, v2
|
|
; VENTUS-NEXT: vluxei32.v v2, (a0), v1
|
|
; VENTUS-NEXT: vadd.vx v0, v0, s0
|
|
; VENTUS-NEXT: vmv.x.s a0, v0
|
|
; VENTUS-NEXT: vluxei32.v v0, (a0), v1
|
|
; VENTUS-NEXT: vadd.vv v0, v0, v2
|
|
; VENTUS-NEXT: vsuxei32.v v0, (a0), v1
|
|
; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload
|
|
; VENTUS-NEXT: lw s0, -12(sp) # 4-byte Folded Reload
|
|
; VENTUS-NEXT: lw s1, -16(sp) # 4-byte Folded Reload
|
|
; VENTUS-NEXT: addi sp, sp, -16
|
|
; VENTUS-NEXT: addi tp, tp, -16
|
|
; VENTUS-NEXT: ret
|
|
entry:
|
|
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
|
|
%arrayidx = getelementptr inbounds i32, ptr addrspace(1) %B, i32 %call
|
|
%0 = load i32, ptr addrspace(1) %arrayidx, align 4
|
|
%arrayidx1 = getelementptr inbounds i32, ptr addrspace(1) %A, i32 %call
|
|
%1 = load i32, ptr addrspace(1) %arrayidx1, align 4
|
|
%add = add nsw i32 %1, %0
|
|
store i32 %add, ptr addrspace(1) %arrayidx1, align 4
|
|
ret void
|
|
}
|
|
|
|
define dso_local void @foo_fun(ptr addrspace(1) nocapture noundef %A, ptr addrspace(1) nocapture noundef readonly %B) {
|
|
; VENTUS-LABEL: foo_fun:
|
|
; VENTUS: # %bb.0: # %entry
|
|
; VENTUS-NEXT: addi sp, sp, 16
|
|
; VENTUS-NEXT: addi tp, tp, 16
|
|
; VENTUS-NEXT: .cfi_def_cfa_offset 16
|
|
; VENTUS-NEXT: sw ra, -8(sp) # 4-byte Folded Spill
|
|
; VENTUS-NEXT: vsw v32, -12(tp) # 4-byte Folded Spill
|
|
; VENTUS-NEXT: vsw v33, -16(tp) # 4-byte Folded Spill
|
|
; VENTUS-NEXT: .cfi_offset ra, 8
|
|
; VENTUS-NEXT: .cfi_offset v32.l, 4
|
|
; VENTUS-NEXT: .cfi_offset v33.l, 0
|
|
; VENTUS-NEXT: vadd.vx v32, v1, zero
|
|
; VENTUS-NEXT: vadd.vx v33, v0, zero
|
|
; VENTUS-NEXT: vmv.s.x v0, zero
|
|
; VENTUS-NEXT: call _Z13get_global_idj
|
|
; VENTUS-NEXT: vmv.s.x v1, zero
|
|
; VENTUS-NEXT: vsll.vi v0, v0, 2
|
|
; VENTUS-NEXT: vadd.vv v2, v32, v0
|
|
; VENTUS-NEXT: vmv.x.s a0, v2
|
|
; VENTUS-NEXT: vluxei32.v v2, (a0), v1
|
|
; VENTUS-NEXT: vadd.vv v0, v33, v0
|
|
; VENTUS-NEXT: vmv.x.s a0, v0
|
|
; VENTUS-NEXT: vluxei32.v v0, (a0), v1
|
|
; VENTUS-NEXT: vadd.vv v0, v0, v2
|
|
; VENTUS-NEXT: vsuxei32.v v0, (a0), v1
|
|
; VENTUS-NEXT: lw ra, -8(sp) # 4-byte Folded Reload
|
|
; VENTUS-NEXT: vlw v32, -12(tp) # 4-byte Folded Reload
|
|
; VENTUS-NEXT: vlw v33, -16(tp) # 4-byte Folded Reload
|
|
; VENTUS-NEXT: addi sp, sp, -16
|
|
; VENTUS-NEXT: addi tp, tp, -16
|
|
; VENTUS-NEXT: ret
|
|
entry:
|
|
%call = tail call i32 @_Z13get_global_idj(i32 noundef 0)
|
|
%arrayidx = getelementptr inbounds i32, ptr addrspace(1) %B, i32 %call
|
|
%0 = load i32, ptr addrspace(1) %arrayidx, align 4
|
|
%arrayidx1 = getelementptr inbounds i32, ptr addrspace(1) %A, i32 %call
|
|
%1 = load i32, ptr addrspace(1) %arrayidx1, align 4
|
|
%add = add nsw i32 %1, %0
|
|
store i32 %add, ptr addrspace(1) %arrayidx1, align 4
|
|
ret void
|
|
}
|
|
|
|
|
|
declare dso_local i32 @_Z13get_global_idj(i32 noundef)
|