This was stored in LiveIntervals, but not actually used for anything related to LiveIntervals. It was only used in one check for if a load instruction is rematerializable. I also don't think this was entirely correct, since it was implicitly assuming constant loads are also dereferenceable. Remove this and rely only on the invariant+dereferenceable flags in the memory operand. Set the flag based on the AA query upfront. This should have the same net benefit, but has the possible disadvantage of making this AA query nonlazy. Preserve the behavior of assuming pointsToConstantMemory implying dereferenceable for now, but maybe this should be changed.
126 lines
5.2 KiB
YAML
126 lines
5.2 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -mtriple=x86_64-- -run-pass machinelicm -mcpu=skx -verify-machineinstrs -o - %s | FileCheck %s
|
|
--- |
|
|
@x = dso_local global i32 0, align 4
|
|
@z = dso_local local_unnamed_addr global [1024 x i32] zeroinitializer, align 16
|
|
@y = dso_local local_unnamed_addr constant i32* null, align 8
|
|
|
|
; Function Attrs: nofree norecurse nosync nounwind uwtable writeonly mustprogress
|
|
define dso_local void @_Z3foov() local_unnamed_addr #0 {
|
|
%1 = load i32*, i32** @y, align 8, !tbaa !3
|
|
%2 = icmp eq i32* %1, @x
|
|
%3 = zext i1 %2 to i32
|
|
br label %5
|
|
4: ; preds = %5
|
|
ret void
|
|
5: ; preds = %5, %0
|
|
%lsr.iv = phi i64 [ %lsr.iv.next, %5 ], [ -4096, %0 ]
|
|
%uglygep = getelementptr i8, i8* bitcast ([1024 x i32]* @z to i8*), i64 %lsr.iv
|
|
%uglygep2 = bitcast i8* %uglygep to i32*
|
|
%scevgep = getelementptr i32, i32* %uglygep2, i64 1024
|
|
store i32 %3, i32* %scevgep, align 4, !tbaa !7
|
|
%lsr.iv.next = add nsw i64 %lsr.iv, 4
|
|
%6 = icmp eq i64 %lsr.iv.next, 0
|
|
br i1 %6, label %4, label %5, !llvm.loop !9
|
|
}
|
|
|
|
attributes #0 = { nofree norecurse nosync nounwind uwtable writeonly mustprogress "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="generic" }
|
|
|
|
!llvm.module.flags = !{!0, !1}
|
|
!llvm.ident = !{!2}
|
|
!0 = !{i32 1, !"wchar_size", i32 4}
|
|
!1 = !{i32 7, !"uwtable", i32 1}
|
|
!2 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git c42dd5dbb015afaef99cf876195c474c63c2393e)"}
|
|
!3 = !{!4, !4, i64 0}
|
|
!4 = !{!"any pointer", !5, i64 0}
|
|
!5 = !{!"omnipotent char", !6, i64 0}
|
|
!6 = !{!"Simple C++ TBAA"}
|
|
!7 = !{!8, !8, i64 0}
|
|
!8 = !{!"int", !5, i64 0}
|
|
!9 = distinct !{!9, !10, !11}
|
|
!10 = !{!"llvm.loop.mustprogress"}
|
|
!11 = !{!"llvm.loop.unroll.disable"}
|
|
|
|
...
|
|
---
|
|
name: _Z3foov
|
|
alignment: 16
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
failedISel: false
|
|
tracksRegLiveness: true
|
|
hasWinCFI: false
|
|
registers: []
|
|
liveins: []
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 1
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
stackProtector: ''
|
|
maxCallFrameSize: 4294967295
|
|
cvBytesOfCalleeSavedRegisters: 0
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
hasTailCall: false
|
|
localFrameSize: 0
|
|
savePoint: ''
|
|
restorePoint: ''
|
|
fixedStack: []
|
|
stack: []
|
|
callSites: []
|
|
debugValueSubstitutions: []
|
|
constants: []
|
|
machineFunctionInfo: {}
|
|
body: |
|
|
; CHECK-LABEL: name: _Z3foov
|
|
; CHECK: bb.0 (%ir-block.0):
|
|
; CHECK-NEXT: successors: %bb.2(0x80000000)
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: renamable $eax = MOV32r0 implicit-def dead $eflags
|
|
; CHECK-NEXT: renamable $rcx = MOV64ri32 -4096
|
|
; CHECK-NEXT: [[MOV64ri32_:%[0-9]+]]:gr64 = MOV64ri32 -4096
|
|
; CHECK-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm $rip, 1, $noreg, @y, $noreg :: (dereferenceable invariant load (s64) from @y, !tbaa !3)
|
|
; CHECK-NEXT: JMP_1 %bb.2
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.1 (%ir-block.4):
|
|
; CHECK-NEXT: RET 0
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.2 (%ir-block.5):
|
|
; CHECK-NEXT: successors: %bb.1(0x04000000), %bb.2(0x7c000000)
|
|
; CHECK-NEXT: liveins: $eax, $rcx
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: CMP64ri32 [[MOV64rm]], @x, implicit-def $eflags
|
|
; CHECK-NEXT: renamable $al = SETCCr 4, implicit killed $eflags, implicit killed $eax, implicit-def $eax
|
|
; CHECK-NEXT: MOV32mr renamable $rcx, 1, $noreg, @z + 4096, $noreg, renamable $eax :: (store (s32) into %ir.scevgep, !tbaa !7)
|
|
; CHECK-NEXT: renamable $rcx = ADD64ri8 killed renamable $rcx, 4, implicit-def $eflags
|
|
; CHECK-NEXT: JCC_1 %bb.1, 4, implicit killed $eflags
|
|
; CHECK-NEXT: JMP_1 %bb.2
|
|
bb.0 (%ir-block.0):
|
|
successors: %bb.2(0x80000000)
|
|
renamable $eax = MOV32r0 implicit-def dead $eflags
|
|
renamable $rcx = MOV64ri32 -4096
|
|
JMP_1 %bb.2
|
|
bb.1 (%ir-block.4):
|
|
RET 0
|
|
bb.2 (%ir-block.5):
|
|
successors: %bb.1(0x04000000), %bb.2(0x7c000000)
|
|
liveins: $eax, $rcx
|
|
%2:gr64 = MOV64ri32 -4096
|
|
CMP64mi32 $rip, 1, $noreg, @y, $noreg, @x, implicit-def $eflags :: (dereferenceable invariant load (s64) from @y, !tbaa !3)
|
|
renamable $al = SETCCr 4, implicit killed $eflags, implicit killed $eax, implicit-def $eax
|
|
MOV32mr renamable $rcx, 1, $noreg, @z + 4096, $noreg, renamable $eax :: (store (s32) into %ir.scevgep, !tbaa !7)
|
|
renamable $rcx = ADD64ri8 killed renamable $rcx, 4, implicit-def $eflags
|
|
JCC_1 %bb.1, 4, implicit killed $eflags
|
|
JMP_1 %bb.2
|
|
|
|
...
|