Files
clang-p2996/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
Jay Foad f2c164c815 [AMDGPU] Do not wait for vscnt on function entry and return
SIInsertWaitcnts inserts waitcnt instructions to resolve data
dependencies. The GFX10+ vscnt (VMEM store count) counter is never used
in this way. It is only used to resolve memory dependencies, and that is
handled by SIMemoryLegalizer. Hence there is no need to conservatively
wait for vscnt to be 0 on function entry and before returns.

Differential Revision: https://reviews.llvm.org/D153537
2023-07-04 12:22:38 +01:00

862 lines
36 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck --check-prefixes=OPT,OPT-GFX7 %s
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck --check-prefixes=OPT,OPT-GFX8 %s
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=gfx900 < %s | FileCheck --check-prefixes=OPT,OPT-GFX9 %s
; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=gfx1030 < %s | FileCheck --check-prefixes=OPT,OPT-GFX10 %s
; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck --check-prefix=GFX7 %s
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck --check-prefix=GFX8 %s
; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s
; RUN: llc -march=amdgcn -mcpu=gfx1030 < %s | FileCheck --check-prefix=GFX10 %s
define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
; OPT-GFX7-LABEL: @test_sinkable_flat_small_offset_i32(
; OPT-GFX7-NEXT: entry:
; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX7: if:
; OPT-GFX7-NEXT: [[LOAD:%.*]] = load i32, ptr [[IN_GEP]], align 4
; OPT-GFX7-NEXT: br label [[ENDIF]]
; OPT-GFX7: endif:
; OPT-GFX7-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
; OPT-GFX7-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
; OPT-GFX7-NEXT: br label [[DONE:%.*]]
; OPT-GFX7: done:
; OPT-GFX7-NEXT: ret void
;
; OPT-GFX8-LABEL: @test_sinkable_flat_small_offset_i32(
; OPT-GFX8-NEXT: entry:
; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX8: if:
; OPT-GFX8-NEXT: [[LOAD:%.*]] = load i32, ptr [[IN_GEP]], align 4
; OPT-GFX8-NEXT: br label [[ENDIF]]
; OPT-GFX8: endif:
; OPT-GFX8-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
; OPT-GFX8-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
; OPT-GFX8-NEXT: br label [[DONE:%.*]]
; OPT-GFX8: done:
; OPT-GFX8-NEXT: ret void
;
; OPT-GFX9-LABEL: @test_sinkable_flat_small_offset_i32(
; OPT-GFX9-NEXT: entry:
; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX9: if:
; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28
; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4
; OPT-GFX9-NEXT: br label [[ENDIF]]
; OPT-GFX9: endif:
; OPT-GFX9-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
; OPT-GFX9-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
; OPT-GFX9-NEXT: ret void
;
; OPT-GFX10-LABEL: @test_sinkable_flat_small_offset_i32(
; OPT-GFX10-NEXT: entry:
; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX10: if:
; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28
; OPT-GFX10-NEXT: [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4
; OPT-GFX10-NEXT: br label [[ENDIF]]
; OPT-GFX10: endif:
; OPT-GFX10-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
; OPT-GFX10-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
; OPT-GFX10-NEXT: ret void
;
; GFX7-LABEL: test_sinkable_flat_small_offset_i32:
; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GFX7-NEXT: v_mov_b32_e32 v4, 0
; GFX7-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX7-NEXT: s_cbranch_execz .LBB0_2
; GFX7-NEXT: ; %bb.1: ; %if
; GFX7-NEXT: v_add_i32_e32 v2, vcc, 28, v2
; GFX7-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GFX7-NEXT: flat_load_dword v4, v[2:3]
; GFX7-NEXT: .LBB0_2: ; %endif
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x3d08fc, v0
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX7-NEXT: flat_store_dword v[0:1], v4
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_sinkable_flat_small_offset_i32:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GFX8-NEXT: v_mov_b32_e32 v4, 0
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX8-NEXT: s_cbranch_execz .LBB0_2
; GFX8-NEXT: ; %bb.1: ; %if
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 28, v2
; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GFX8-NEXT: flat_load_dword v4, v[2:3]
; GFX8-NEXT: .LBB0_2: ; %endif
; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x3d08fc, v0
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX8-NEXT: flat_store_dword v[0:1], v4
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: test_sinkable_flat_small_offset_i32:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX9-NEXT: s_cbranch_execz .LBB0_2
; GFX9-NEXT: ; %bb.1: ; %if
; GFX9-NEXT: flat_load_dword v4, v[2:3] offset:28
; GFX9-NEXT: .LBB0_2: ; %endif
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3d0000, v0
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: flat_store_dword v[0:1], v4 offset:2300
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_sinkable_flat_small_offset_i32:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
; GFX10-NEXT: s_cbranch_execz .LBB0_2
; GFX10-NEXT: ; %bb.1: ; %if
; GFX10-NEXT: flat_load_dword v4, v[2:3] offset:28
; GFX10-NEXT: .LBB0_2: ; %endif
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x3d0800, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: flat_store_dword v[0:1], v4 offset:252
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%out.gep = getelementptr i32, ptr %out, i64 999999
%in.gep = getelementptr i32, ptr %in, i64 7
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %endif, label %if
if:
%load = load i32, ptr %in.gep
br label %endif
endif:
%x = phi i32 [ %load, %if ], [ 0, %entry ]
store i32 %x, ptr %out.gep
br label %done
done:
ret void
}
define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in, i32 %cond) {
; OPT-GFX7-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
; OPT-GFX7-NEXT: entry:
; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX7: if:
; OPT-GFX7-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
; OPT-GFX7-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
; OPT-GFX7-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
; OPT-GFX7-NEXT: br label [[ENDIF]]
; OPT-GFX7: endif:
; OPT-GFX7-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
; OPT-GFX7-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
; OPT-GFX7-NEXT: ret void
;
; OPT-GFX8-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
; OPT-GFX8-NEXT: entry:
; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX8: if:
; OPT-GFX8-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN_GEP]] to ptr addrspace(1)
; OPT-GFX8-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[TMP0]], align 4
; OPT-GFX8-NEXT: br label [[ENDIF]]
; OPT-GFX8: endif:
; OPT-GFX8-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
; OPT-GFX8-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
; OPT-GFX8-NEXT: ret void
;
; OPT-GFX9-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
; OPT-GFX9-NEXT: entry:
; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX9: if:
; OPT-GFX9-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
; OPT-GFX9-NEXT: br label [[ENDIF]]
; OPT-GFX9: endif:
; OPT-GFX9-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
; OPT-GFX9-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
; OPT-GFX9-NEXT: ret void
;
; OPT-GFX10-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
; OPT-GFX10-NEXT: entry:
; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX10: if:
; OPT-GFX10-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
; OPT-GFX10-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
; OPT-GFX10-NEXT: br label [[ENDIF]]
; OPT-GFX10: endif:
; OPT-GFX10-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
; OPT-GFX10-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
; OPT-GFX10-NEXT: ret void
;
; GFX7-LABEL: test_sink_noop_addrspacecast_flat_to_global_i32:
; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GFX7-NEXT: v_mov_b32_e32 v4, 0
; GFX7-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX7-NEXT: s_cbranch_execz .LBB1_2
; GFX7-NEXT: ; %bb.1: ; %if
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b32 s4, s6
; GFX7-NEXT: s_mov_b32 s5, s6
; GFX7-NEXT: buffer_load_dword v4, v[2:3], s[4:7], 0 addr64 offset:28
; GFX7-NEXT: .LBB1_2: ; %endif
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x3d08fc, v0
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: flat_store_dword v[0:1], v4
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_sink_noop_addrspacecast_flat_to_global_i32:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GFX8-NEXT: v_mov_b32_e32 v4, 0
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX8-NEXT: s_cbranch_execz .LBB1_2
; GFX8-NEXT: ; %bb.1: ; %if
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 28, v2
; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GFX8-NEXT: flat_load_dword v4, v[2:3]
; GFX8-NEXT: .LBB1_2: ; %endif
; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x3d08fc, v0
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: flat_store_dword v[0:1], v4
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: test_sink_noop_addrspacecast_flat_to_global_i32:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX9-NEXT: s_cbranch_execz .LBB1_2
; GFX9-NEXT: ; %bb.1: ; %if
; GFX9-NEXT: global_load_dword v4, v[2:3], off offset:28
; GFX9-NEXT: .LBB1_2: ; %endif
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3d0000, v0
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: flat_store_dword v[0:1], v4 offset:2300
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_sink_noop_addrspacecast_flat_to_global_i32:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
; GFX10-NEXT: s_cbranch_execz .LBB1_2
; GFX10-NEXT: ; %bb.1: ; %if
; GFX10-NEXT: global_load_dword v4, v[2:3], off offset:28
; GFX10-NEXT: .LBB1_2: ; %endif
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x3d0800, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: flat_store_dword v[0:1], v4 offset:252
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%out.gep = getelementptr i32, ptr %out, i64 999999
%in.gep = getelementptr i32, ptr %in, i64 7
%cast = addrspacecast ptr %in.gep to ptr addrspace(1)
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %endif, label %if
if:
%load = load i32, ptr addrspace(1) %cast
br label %endif
endif:
%x = phi i32 [ %load, %if ], [ 0, %entry ]
store i32 %x, ptr %out.gep
br label %done
done:
ret void
}
define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in, i32 %cond) {
; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32(
; OPT-NEXT: entry:
; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
; OPT-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT: if:
; OPT-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(4)
; OPT-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i64 28
; OPT-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(4) [[SUNKADDR]], align 4
; OPT-NEXT: br label [[ENDIF]]
; OPT: endif:
; OPT-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
; OPT-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
; OPT-NEXT: ret void
;
; GFX7-LABEL: test_sink_noop_addrspacecast_flat_to_constant_i32:
; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GFX7-NEXT: v_mov_b32_e32 v4, 0
; GFX7-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX7-NEXT: s_cbranch_execz .LBB2_2
; GFX7-NEXT: ; %bb.1: ; %if
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_mov_b32 s4, s6
; GFX7-NEXT: s_mov_b32 s5, s6
; GFX7-NEXT: buffer_load_dword v4, v[2:3], s[4:7], 0 addr64 offset:28
; GFX7-NEXT: .LBB2_2: ; %endif
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x3d08fc, v0
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: flat_store_dword v[0:1], v4
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_sink_noop_addrspacecast_flat_to_constant_i32:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GFX8-NEXT: v_mov_b32_e32 v4, 0
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX8-NEXT: s_cbranch_execz .LBB2_2
; GFX8-NEXT: ; %bb.1: ; %if
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 28, v2
; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GFX8-NEXT: flat_load_dword v4, v[2:3]
; GFX8-NEXT: .LBB2_2: ; %endif
; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x3d08fc, v0
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: flat_store_dword v[0:1], v4
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: test_sink_noop_addrspacecast_flat_to_constant_i32:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX9-NEXT: s_cbranch_execz .LBB2_2
; GFX9-NEXT: ; %bb.1: ; %if
; GFX9-NEXT: global_load_dword v4, v[2:3], off offset:28
; GFX9-NEXT: .LBB2_2: ; %endif
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3d0000, v0
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: flat_store_dword v[0:1], v4 offset:2300
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_sink_noop_addrspacecast_flat_to_constant_i32:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
; GFX10-NEXT: s_cbranch_execz .LBB2_2
; GFX10-NEXT: ; %bb.1: ; %if
; GFX10-NEXT: global_load_dword v4, v[2:3], off offset:28
; GFX10-NEXT: .LBB2_2: ; %endif
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x3d0800, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: flat_store_dword v[0:1], v4 offset:252
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%out.gep = getelementptr i32, ptr %out, i64 999999
%in.gep = getelementptr i32, ptr %in, i64 7
%cast = addrspacecast ptr %in.gep to ptr addrspace(4)
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %endif, label %if
if:
%load = load i32, ptr addrspace(4) %cast
br label %endif
endif:
%x = phi i32 [ %load, %if ], [ 0, %entry ]
store i32 %x, ptr %out.gep
br label %done
done:
ret void
}
define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
; OPT-GFX7-LABEL: @test_sink_flat_small_max_flat_offset(
; OPT-GFX7-NEXT: entry:
; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
; OPT-GFX7-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX7: if:
; OPT-GFX7-NEXT: [[LOAD:%.*]] = load i8, ptr [[IN_GEP]], align 1
; OPT-GFX7-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32
; OPT-GFX7-NEXT: br label [[ENDIF]]
; OPT-GFX7: endif:
; OPT-GFX7-NEXT: [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
; OPT-GFX7-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
; OPT-GFX7-NEXT: br label [[DONE:%.*]]
; OPT-GFX7: done:
; OPT-GFX7-NEXT: ret void
;
; OPT-GFX8-LABEL: @test_sink_flat_small_max_flat_offset(
; OPT-GFX8-NEXT: entry:
; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
; OPT-GFX8-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX8: if:
; OPT-GFX8-NEXT: [[LOAD:%.*]] = load i8, ptr [[IN_GEP]], align 1
; OPT-GFX8-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32
; OPT-GFX8-NEXT: br label [[ENDIF]]
; OPT-GFX8: endif:
; OPT-GFX8-NEXT: [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
; OPT-GFX8-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
; OPT-GFX8-NEXT: br label [[DONE:%.*]]
; OPT-GFX8: done:
; OPT-GFX8-NEXT: ret void
;
; OPT-GFX9-LABEL: @test_sink_flat_small_max_flat_offset(
; OPT-GFX9-NEXT: entry:
; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
; OPT-GFX9-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX9: if:
; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i8, ptr [[SUNKADDR]], align 1
; OPT-GFX9-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32
; OPT-GFX9-NEXT: br label [[ENDIF]]
; OPT-GFX9: endif:
; OPT-GFX9-NEXT: [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
; OPT-GFX9-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
; OPT-GFX9-NEXT: ret void
;
; OPT-GFX10-LABEL: @test_sink_flat_small_max_flat_offset(
; OPT-GFX10-NEXT: entry:
; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
; OPT-GFX10-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
; OPT-GFX10-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX10: if:
; OPT-GFX10-NEXT: [[LOAD:%.*]] = load i8, ptr [[IN_GEP]], align 1
; OPT-GFX10-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32
; OPT-GFX10-NEXT: br label [[ENDIF]]
; OPT-GFX10: endif:
; OPT-GFX10-NEXT: [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
; OPT-GFX10-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
; OPT-GFX10-NEXT: br label [[DONE:%.*]]
; OPT-GFX10: done:
; OPT-GFX10-NEXT: ret void
;
; GFX7-LABEL: test_sink_flat_small_max_flat_offset:
; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_mbcnt_lo_u32_b32_e64 v5, -1, 0
; GFX7-NEXT: v_mov_b32_e32 v4, 0
; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
; GFX7-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX7-NEXT: s_cbranch_execz .LBB3_2
; GFX7-NEXT: ; %bb.1: ; %if
; GFX7-NEXT: v_add_i32_e32 v2, vcc, 0xfff, v2
; GFX7-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GFX7-NEXT: flat_load_sbyte v4, v[2:3]
; GFX7-NEXT: .LBB3_2: ; %endif
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x1000, v0
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX7-NEXT: flat_store_dword v[0:1], v4
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_sink_flat_small_max_flat_offset:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mbcnt_lo_u32_b32 v5, -1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, 0
; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX8-NEXT: s_cbranch_execz .LBB3_2
; GFX8-NEXT: ; %bb.1: ; %if
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xfff, v2
; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GFX8-NEXT: flat_load_sbyte v4, v[2:3]
; GFX8-NEXT: .LBB3_2: ; %endif
; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x1000, v0
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX8-NEXT: flat_store_dword v[0:1], v4
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: test_sink_flat_small_max_flat_offset:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mbcnt_lo_u32_b32 v5, -1, 0
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX9-NEXT: s_cbranch_execz .LBB3_2
; GFX9-NEXT: ; %bb.1: ; %if
; GFX9-NEXT: flat_load_sbyte v4, v[2:3] offset:4095
; GFX9-NEXT: .LBB3_2: ; %endif
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: flat_store_dword v[0:1], v4
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_sink_flat_small_max_flat_offset:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mbcnt_lo_u32_b32 v4, -1, 0
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
; GFX10-NEXT: s_cbranch_execz .LBB3_2
; GFX10-NEXT: ; %bb.1: ; %if
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, 0x800, v2
; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
; GFX10-NEXT: flat_load_sbyte v4, v[2:3] offset:2047
; GFX10-NEXT: .LBB3_2: ; %endif
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: flat_store_dword v[0:1], v4
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%out.gep = getelementptr i32, ptr %out, i32 1024
%in.gep = getelementptr i8, ptr %in, i64 4095
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%cmp0 = icmp eq i32 %tid, 0
br i1 %cmp0, label %endif, label %if
if:
%load = load i8, ptr %in.gep
%cast = sext i8 %load to i32
br label %endif
endif:
%x = phi i32 [ %cast, %if ], [ 0, %entry ]
store i32 %x, ptr %out.gep
br label %done
done:
ret void
}
define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 {
; OPT-LABEL: @test_sink_flat_small_max_plus_1_flat_offset(
; OPT-NEXT: entry:
; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 99999
; OPT-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4096
; OPT-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
; OPT-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
; OPT-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT: if:
; OPT-NEXT: [[LOAD:%.*]] = load i8, ptr [[IN_GEP]], align 1
; OPT-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32
; OPT-NEXT: br label [[ENDIF]]
; OPT: endif:
; OPT-NEXT: [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
; OPT-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
; OPT-NEXT: br label [[DONE:%.*]]
; OPT: done:
; OPT-NEXT: ret void
;
; GFX7-LABEL: test_sink_flat_small_max_plus_1_flat_offset:
; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_mbcnt_lo_u32_b32_e64 v5, -1, 0
; GFX7-NEXT: v_mov_b32_e32 v4, 0
; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
; GFX7-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX7-NEXT: s_cbranch_execz .LBB4_2
; GFX7-NEXT: ; %bb.1: ; %if
; GFX7-NEXT: v_add_i32_e32 v2, vcc, 0x1000, v2
; GFX7-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GFX7-NEXT: flat_load_sbyte v4, v[2:3]
; GFX7-NEXT: .LBB4_2: ; %endif
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x61a7c, v0
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX7-NEXT: flat_store_dword v[0:1], v4
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_sink_flat_small_max_plus_1_flat_offset:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mbcnt_lo_u32_b32 v5, -1, 0
; GFX8-NEXT: v_mov_b32_e32 v4, 0
; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX8-NEXT: s_cbranch_execz .LBB4_2
; GFX8-NEXT: ; %bb.1: ; %if
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x1000, v2
; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GFX8-NEXT: flat_load_sbyte v4, v[2:3]
; GFX8-NEXT: .LBB4_2: ; %endif
; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x61a7c, v0
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX8-NEXT: flat_store_dword v[0:1], v4
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: test_sink_flat_small_max_plus_1_flat_offset:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mbcnt_lo_u32_b32 v5, -1, 0
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX9-NEXT: s_cbranch_execz .LBB4_2
; GFX9-NEXT: ; %bb.1: ; %if
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 0x1000, v2
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
; GFX9-NEXT: flat_load_sbyte v4, v[2:3]
; GFX9-NEXT: .LBB4_2: ; %endif
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x61000, v0
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: flat_store_dword v[0:1], v4 offset:2684
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_sink_flat_small_max_plus_1_flat_offset:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mbcnt_lo_u32_b32 v4, -1, 0
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
; GFX10-NEXT: s_cbranch_execz .LBB4_2
; GFX10-NEXT: ; %bb.1: ; %if
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, 0x1000, v2
; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
; GFX10-NEXT: flat_load_sbyte v4, v[2:3]
; GFX10-NEXT: .LBB4_2: ; %endif
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x61800, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: flat_store_dword v[0:1], v4 offset:636
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%out.gep = getelementptr i32, ptr %out, i64 99999
%in.gep = getelementptr i8, ptr %in, i64 4096
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%cmp0 = icmp eq i32 %tid, 0
br i1 %cmp0, label %endif, label %if
if:
%load = load i8, ptr %in.gep
%cast = sext i8 %load to i32
br label %endif
endif:
%x = phi i32 [ %cast, %if ], [ 0, %entry ]
store i32 %x, ptr %out.gep
br label %done
done:
ret void
}
define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 {
; OPT-LABEL: @test_sinkable_flat_reg_offset(
; OPT-NEXT: entry:
; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
; OPT-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 [[REG:%.*]]
; OPT-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3]]
; OPT-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
; OPT-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT: if:
; OPT-NEXT: [[LOAD:%.*]] = load i8, ptr [[IN_GEP]], align 1
; OPT-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32
; OPT-NEXT: br label [[ENDIF]]
; OPT: endif:
; OPT-NEXT: [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
; OPT-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
; OPT-NEXT: br label [[DONE:%.*]]
; OPT: done:
; OPT-NEXT: ret void
;
; GFX7-LABEL: test_sinkable_flat_reg_offset:
; GFX7: ; %bb.0: ; %entry
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX7-NEXT: v_mbcnt_lo_u32_b32_e64 v7, -1, 0
; GFX7-NEXT: v_mov_b32_e32 v6, 0
; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
; GFX7-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX7-NEXT: s_cbranch_execz .LBB5_2
; GFX7-NEXT: ; %bb.1: ; %if
; GFX7-NEXT: v_add_i32_e32 v2, vcc, v2, v4
; GFX7-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc
; GFX7-NEXT: flat_load_sbyte v6, v[2:3]
; GFX7-NEXT: .LBB5_2: ; %endif
; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x1000, v0
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX7-NEXT: flat_store_dword v[0:1], v6
; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: test_sinkable_flat_reg_offset:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mbcnt_lo_u32_b32 v7, -1, 0
; GFX8-NEXT: v_mov_b32_e32 v6, 0
; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX8-NEXT: s_cbranch_execz .LBB5_2
; GFX8-NEXT: ; %bb.1: ; %if
; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4
; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc
; GFX8-NEXT: flat_load_sbyte v6, v[2:3]
; GFX8-NEXT: .LBB5_2: ; %endif
; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x1000, v0
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX8-NEXT: flat_store_dword v[0:1], v6
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: test_sinkable_flat_reg_offset:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mbcnt_lo_u32_b32 v7, -1, 0
; GFX9-NEXT: v_mov_b32_e32 v6, 0
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX9-NEXT: s_cbranch_execz .LBB5_2
; GFX9-NEXT: ; %bb.1: ; %if
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
; GFX9-NEXT: flat_load_sbyte v6, v[2:3]
; GFX9-NEXT: .LBB5_2: ; %endif
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: flat_store_dword v[0:1], v6
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_sinkable_flat_reg_offset:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_mbcnt_lo_u32_b32 v6, -1, 0
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6
; GFX10-NEXT: v_mov_b32_e32 v6, 0
; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
; GFX10-NEXT: s_cbranch_execz .LBB5_2
; GFX10-NEXT: ; %bb.1: ; %if
; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v5, vcc_lo
; GFX10-NEXT: flat_load_sbyte v6, v[2:3]
; GFX10-NEXT: .LBB5_2: ; %endif
; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX10-NEXT: flat_store_dword v[0:1], v6
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
%out.gep = getelementptr i32, ptr %out, i32 1024
%in.gep = getelementptr i8, ptr %in, i64 %reg
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
%cmp0 = icmp eq i32 %tid, 0
br i1 %cmp0, label %endif, label %if
if:
%load = load i8, ptr %in.gep
%cast = sext i8 %load to i32
br label %endif
endif:
%x = phi i32 [ %cast, %if ], [ 0, %entry ]
store i32 %x, ptr %out.gep
br label %done
done:
ret void
}
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
attributes #2 = { nounwind argmemonly }