This PR updates AMDGPULowerBufferFatPointers to use the InstSimplifyFolder when creating IR during buffer fat pointer lowering. This shouldn't cause any large functional changes and might improve the quality of the generated code.
127 lines
10 KiB
LLVM
127 lines
10 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: opt -S -mcpu=gfx900 -amdgpu-lower-buffer-fat-pointers -check-debugify < %s | FileCheck %s
|
|
; RUN: opt -S -mcpu=gfx900 -passes=amdgpu-lower-buffer-fat-pointers,check-debugify < %s | FileCheck %s
|
|
|
|
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
|
|
target triple = "amdgcn--"
|
|
|
|
define float @debug_stash_pointer(ptr addrspace(8) %buf, i32 %idx, ptr addrspace(8) %aux) !dbg !5 {
|
|
; CHECK-LABEL: define float @debug_stash_pointer
|
|
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[IDX:%.*]], ptr addrspace(8) [[AUX:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG5:![0-9]+]] {
|
|
; CHECK-NEXT: [[BUF_PTR_VAR:%.*]] = alloca i160, align 32, addrspace(5), !dbg [[DBG21:![0-9]+]]
|
|
; CHECK-NEXT: #dbg_value(ptr addrspace(5) [[BUF_PTR_VAR]], [[META10:![0-9]+]], !DIExpression(), [[DBG21]])
|
|
; CHECK-NEXT: [[AUX_PTR_VAR:%.*]] = alloca i160, align 32, addrspace(5), !dbg [[DBG22:![0-9]+]]
|
|
; CHECK-NEXT: #dbg_value(ptr addrspace(5) [[AUX_PTR_VAR]], [[META12:![0-9]+]], !DIExpression(), [[DBG22]])
|
|
; CHECK-NEXT: #dbg_value({ ptr addrspace(8), i32 } poison, [[META13:![0-9]+]], !DIExpression(), [[META23:![0-9]+]])
|
|
; CHECK-NEXT: [[BUF_PTR_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[BUF]] to i160, !dbg [[DBG24:![0-9]+]]
|
|
; CHECK-NEXT: [[BUF_PTR_INT:%.*]] = shl nuw i160 [[BUF_PTR_INT_RSRC]], 32, !dbg [[DBG24]]
|
|
; CHECK-NEXT: store i160 [[BUF_PTR_INT]], ptr addrspace(5) [[BUF_PTR_VAR]], align 32, !dbg [[DBG24]]
|
|
; CHECK-NEXT: #dbg_value({ ptr addrspace(8), i32 } poison, [[META15:![0-9]+]], !DIExpression(), [[META25:![0-9]+]])
|
|
; CHECK-NEXT: [[AUX_PTR_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[AUX]] to i160, !dbg [[DBG26:![0-9]+]]
|
|
; CHECK-NEXT: [[AUX_PTR_INT:%.*]] = shl nuw i160 [[AUX_PTR_INT_RSRC]], 32, !dbg [[DBG26]]
|
|
; CHECK-NEXT: store i160 [[AUX_PTR_INT]], ptr addrspace(5) [[AUX_PTR_VAR]], align 32, !dbg [[DBG26]]
|
|
; CHECK-NEXT: [[BUF_PTR_2:%.*]] = load i160, ptr addrspace(5) [[BUF_PTR_VAR]], align 32, !dbg [[DBG27:![0-9]+]]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = lshr i160 [[BUF_PTR_2]], 32, !dbg [[DBG27]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = trunc i160 [[TMP1]] to i128, !dbg [[DBG27]]
|
|
; CHECK-NEXT: [[BUF_PTR_2_PTR_RSRC:%.*]] = inttoptr i128 [[TMP2]] to ptr addrspace(8), !dbg [[DBG27]]
|
|
; CHECK-NEXT: [[BUF_PTR_2_PTR_OFF:%.*]] = trunc i160 [[BUF_PTR_2]] to i32, !dbg [[DBG27]]
|
|
; CHECK-NEXT: #dbg_value({ ptr addrspace(8), i32 } poison, [[META16:![0-9]+]], !DIExpression(), [[DBG27]])
|
|
; CHECK-NEXT: [[BUF_PTR_3_IDX:%.*]] = mul i32 [[IDX]], 4, !dbg [[DBG28:![0-9]+]]
|
|
; CHECK-NEXT: [[BUF_PTR_3:%.*]] = add i32 [[BUF_PTR_2_PTR_OFF]], [[BUF_PTR_3_IDX]], !dbg [[DBG28]]
|
|
; CHECK-NEXT: #dbg_value({ ptr addrspace(8), i32 } poison, [[META17:![0-9]+]], !DIExpression(), [[DBG28]])
|
|
; CHECK-NEXT: [[BUF_PTR_3_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[BUF_PTR_2_PTR_RSRC]] to i160, !dbg [[DBG29:![0-9]+]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i160 [[BUF_PTR_3_INT_RSRC]], 32, !dbg [[DBG29]]
|
|
; CHECK-NEXT: [[BUF_PTR_3_INT_OFF:%.*]] = zext i32 [[BUF_PTR_3]] to i160, !dbg [[DBG29]]
|
|
; CHECK-NEXT: [[BUF_PTR_3_INT:%.*]] = or i160 [[TMP3]], [[BUF_PTR_3_INT_OFF]], !dbg [[DBG29]]
|
|
; CHECK-NEXT: store i160 [[BUF_PTR_3_INT]], ptr addrspace(5) [[BUF_PTR_VAR]], align 32, !dbg [[DBG29]]
|
|
; CHECK-NEXT: [[BUF_PTR_4:%.*]] = load i160, ptr addrspace(5) [[BUF_PTR_VAR]], align 32, !dbg [[DBG30:![0-9]+]]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = lshr i160 [[BUF_PTR_4]], 32, !dbg [[DBG30]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = trunc i160 [[TMP4]] to i128, !dbg [[DBG30]]
|
|
; CHECK-NEXT: [[BUF_PTR_4_PTR_RSRC:%.*]] = inttoptr i128 [[TMP5]] to ptr addrspace(8), !dbg [[DBG30]]
|
|
; CHECK-NEXT: [[BUF_PTR_4_PTR_OFF:%.*]] = trunc i160 [[BUF_PTR_4]] to i32, !dbg [[DBG30]]
|
|
; CHECK-NEXT: #dbg_value({ ptr addrspace(8), i32 } poison, [[META18:![0-9]+]], !DIExpression(), [[DBG30]])
|
|
; CHECK-NEXT: [[RET:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF_PTR_4_PTR_RSRC]], i32 [[BUF_PTR_4_PTR_OFF]], i32 0, i32 0), !dbg [[DBG31:![0-9]+]]
|
|
; CHECK-NEXT: #dbg_value(float [[RET]], [[META19:![0-9]+]], !DIExpression(), [[DBG31]])
|
|
; CHECK-NEXT: [[AUX_PTR_2:%.*]] = load i160, ptr addrspace(5) [[AUX_PTR_VAR]], align 32, !dbg [[DBG32:![0-9]+]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = lshr i160 [[AUX_PTR_2]], 32, !dbg [[DBG32]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = trunc i160 [[TMP6]] to i128, !dbg [[DBG32]]
|
|
; CHECK-NEXT: [[AUX_PTR_2_PTR_RSRC:%.*]] = inttoptr i128 [[TMP7]] to ptr addrspace(8), !dbg [[DBG32]]
|
|
; CHECK-NEXT: [[AUX_PTR_2_PTR_OFF:%.*]] = trunc i160 [[AUX_PTR_2]] to i32, !dbg [[DBG32]]
|
|
; CHECK-NEXT: #dbg_value({ ptr addrspace(8), i32 } poison, [[META20:![0-9]+]], !DIExpression(), [[DBG32]])
|
|
; CHECK-NEXT: [[BUF_PTR_4_LEGAL:%.*]] = bitcast i160 [[BUF_PTR_4]] to <5 x i32>, !dbg [[DBG33:![0-9]+]]
|
|
; CHECK-NEXT: [[BUF_PTR_4_SLICE_0:%.*]] = shufflevector <5 x i32> [[BUF_PTR_4_LEGAL]], <5 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>, !dbg [[DBG33]]
|
|
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[BUF_PTR_4_SLICE_0]], ptr addrspace(8) align 32 [[AUX_PTR_2_PTR_RSRC]], i32 [[AUX_PTR_2_PTR_OFF]], i32 0, i32 0), !dbg [[DBG33]]
|
|
; CHECK-NEXT: [[AUX_PTR_2_PTR_PART_4:%.*]] = add nuw i32 [[AUX_PTR_2_PTR_OFF]], 16, !dbg [[DBG33]]
|
|
; CHECK-NEXT: [[BUF_PTR_4_SLICE_4:%.*]] = extractelement <5 x i32> [[BUF_PTR_4_LEGAL]], i64 4, !dbg [[DBG33]]
|
|
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[BUF_PTR_4_SLICE_4]], ptr addrspace(8) align 16 [[AUX_PTR_2_PTR_RSRC]], i32 [[AUX_PTR_2_PTR_PART_4]], i32 0, i32 0), !dbg [[DBG33]]
|
|
; CHECK-NEXT: ret float [[RET]], !dbg [[DBG34:![0-9]+]]
|
|
;
|
|
%buf.ptr.var = alloca ptr addrspace(7), align 32, addrspace(5), !dbg !20
|
|
call void @llvm.dbg.value(metadata ptr addrspace(5) %buf.ptr.var, metadata !9, metadata !DIExpression()), !dbg !20
|
|
%aux.ptr.var = alloca ptr addrspace(7), align 32, addrspace(5), !dbg !21
|
|
call void @llvm.dbg.value(metadata ptr addrspace(5) %aux.ptr.var, metadata !11, metadata !DIExpression()), !dbg !21
|
|
%buf.ptr = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7), !dbg !22
|
|
call void @llvm.dbg.value(metadata ptr addrspace(7) %buf.ptr, metadata !12, metadata !DIExpression()), !dbg !22
|
|
store ptr addrspace(7) %buf.ptr, ptr addrspace(5) %buf.ptr.var, align 32, !dbg !23
|
|
%aux.ptr = addrspacecast ptr addrspace(8) %aux to ptr addrspace(7), !dbg !24
|
|
call void @llvm.dbg.value(metadata ptr addrspace(7) %aux.ptr, metadata !14, metadata !DIExpression()), !dbg !24
|
|
store ptr addrspace(7) %aux.ptr, ptr addrspace(5) %aux.ptr.var, align 32, !dbg !25
|
|
%buf.ptr.2 = load ptr addrspace(7), ptr addrspace(5) %buf.ptr.var, align 32, !dbg !26
|
|
call void @llvm.dbg.value(metadata ptr addrspace(7) %buf.ptr.2, metadata !15, metadata !DIExpression()), !dbg !26
|
|
%buf.ptr.3 = getelementptr float, ptr addrspace(7) %buf.ptr.2, i32 %idx, !dbg !27
|
|
call void @llvm.dbg.value(metadata ptr addrspace(7) %buf.ptr.3, metadata !16, metadata !DIExpression()), !dbg !27
|
|
store ptr addrspace(7) %buf.ptr.3, ptr addrspace(5) %buf.ptr.var, align 32, !dbg !28
|
|
%buf.ptr.4 = load ptr addrspace(7), ptr addrspace(5) %buf.ptr.var, align 32, !dbg !29
|
|
call void @llvm.dbg.value(metadata ptr addrspace(7) %buf.ptr.4, metadata !17, metadata !DIExpression()), !dbg !29
|
|
%ret = load float, ptr addrspace(7) %buf.ptr.4, align 4, !dbg !30
|
|
call void @llvm.dbg.value(metadata float %ret, metadata !18, metadata !DIExpression()), !dbg !30
|
|
%aux.ptr.2 = load ptr addrspace(7), ptr addrspace(5) %aux.ptr.var, align 32, !dbg !31
|
|
call void @llvm.dbg.value(metadata ptr addrspace(7) %aux.ptr.2, metadata !19, metadata !DIExpression()), !dbg !31
|
|
store ptr addrspace(7) %buf.ptr.4, ptr addrspace(7) %aux.ptr.2, align 32, !dbg !32
|
|
ret float %ret, !dbg !33
|
|
}
|
|
|
|
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
|
declare void @llvm.dbg.value(metadata, metadata, metadata) #0
|
|
|
|
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
|
|
!llvm.dbg.cu = !{!0}
|
|
!llvm.debugify = !{!2, !3}
|
|
!llvm.module.flags = !{!4}
|
|
|
|
!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
|
|
!1 = !DIFile(filename: "<stdin>", directory: "/")
|
|
!2 = !{i32 14}
|
|
!3 = !{i32 9}
|
|
!4 = !{i32 2, !"Debug Info Version", i32 3}
|
|
!5 = distinct !DISubprogram(name: "debug_stash_pointer", linkageName: "debug_stash_pointer", scope: null, file: !1, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !8)
|
|
!6 = !DISubroutineType(types: !7)
|
|
!7 = !{}
|
|
!8 = !{!9, !11, !12, !14, !15, !16, !17, !18, !19}
|
|
!9 = !DILocalVariable(name: "1", scope: !5, file: !1, line: 1, type: !10)
|
|
!10 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned)
|
|
!11 = !DILocalVariable(name: "2", scope: !5, file: !1, line: 2, type: !10)
|
|
!12 = !DILocalVariable(name: "3", scope: !5, file: !1, line: 3, type: !13)
|
|
!13 = !DIBasicType(name: "ty256", size: 256, encoding: DW_ATE_unsigned)
|
|
!14 = !DILocalVariable(name: "4", scope: !5, file: !1, line: 5, type: !13)
|
|
!15 = !DILocalVariable(name: "5", scope: !5, file: !1, line: 7, type: !13)
|
|
!16 = !DILocalVariable(name: "6", scope: !5, file: !1, line: 8, type: !13)
|
|
!17 = !DILocalVariable(name: "7", scope: !5, file: !1, line: 10, type: !13)
|
|
!18 = !DILocalVariable(name: "8", scope: !5, file: !1, line: 11, type: !10)
|
|
!19 = !DILocalVariable(name: "9", scope: !5, file: !1, line: 12, type: !13)
|
|
!20 = !DILocation(line: 1, column: 1, scope: !5)
|
|
!21 = !DILocation(line: 2, column: 1, scope: !5)
|
|
!22 = !DILocation(line: 3, column: 1, scope: !5)
|
|
!23 = !DILocation(line: 4, column: 1, scope: !5)
|
|
!24 = !DILocation(line: 5, column: 1, scope: !5)
|
|
!25 = !DILocation(line: 6, column: 1, scope: !5)
|
|
!26 = !DILocation(line: 7, column: 1, scope: !5)
|
|
!27 = !DILocation(line: 8, column: 1, scope: !5)
|
|
!28 = !DILocation(line: 9, column: 1, scope: !5)
|
|
!29 = !DILocation(line: 10, column: 1, scope: !5)
|
|
!30 = !DILocation(line: 11, column: 1, scope: !5)
|
|
!31 = !DILocation(line: 12, column: 1, scope: !5)
|
|
!32 = !DILocation(line: 13, column: 1, scope: !5)
|
|
!33 = !DILocation(line: 14, column: 1, scope: !5)
|