Files
clang-p2996/llvm/test/CodeGen/Hexagon/machine-sink-float-usr.mir
Sumanth Gundapaneni 822448635e [Hexagon] Fix MachineSink not to hoist FP instructions that update USR.
Ideally we should make USR as Def for these floating point instructions.
However, it violates some assembler MCChecker rules. This patch fixes
the issue by marking these FP instructions as non-sinkable.
2022-01-04 15:55:22 -08:00

326 lines
14 KiB
YAML

# RUN: llc -march=hexagon -run-pass machine-sink -o - %s | FileCheck %s
# Test that MachineSink does not sink F2_conv_w2sf.
# CHECK: name:{{.*}} main
# CHECK: J2_call @feclearexcept
# CHECK: F2_conv_w2sf
# CHECK: J2_call @fetestexcept
--- |
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
target triple = "hexagon"
@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
define dso_local i32 @syst_int32_to_float32(i32 %a) local_unnamed_addr #0 {
entry:
%conv = sitofp i32 %a to float
%0 = bitcast float %conv to i32
ret i32 %0
}
; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
; Function Attrs: argmemonly mustprogress nofree nosync nounwind willreturn
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
; Function Attrs: nounwind
define dso_local i32 @main() local_unnamed_addr #2 {
entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
%c = alloca i32, align 4
%a.0.a.0.a.0.a.0..sroa_cast = bitcast i32* %a to i8*
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %a.0.a.0.a.0.a.0..sroa_cast)
store volatile i32 -16777235, i32* %a, align 4, !tbaa !3
%b.0.b.0.b.0.b.0..sroa_cast = bitcast i32* %b to i8*
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %b.0.b.0.b.0.b.0..sroa_cast)
store volatile i32 34, i32* %b, align 4, !tbaa !3
%c.0.c.0.c.0.c.0..sroa_cast = bitcast i32* %c to i8*
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %c.0.c.0.c.0.c.0..sroa_cast)
store volatile i32 34, i32* %c, align 4, !tbaa !3
%b.0.b.0.b.0.b.0.29 = load volatile i32, i32* %b, align 4, !tbaa !3
%cmp30 = icmp sgt i32 %b.0.b.0.b.0.b.0.29, 0
br i1 %cmp30, label %for.body, label %if.end
for.cond.for.cond.cleanup_crit_edge: ; preds = %for.body
%conv.i.le = sitofp i32 %a.0.a.0.a.0.a.0. to float
%0 = bitcast float %conv.i.le to i32
%phi.cmp = icmp ugt i32 %0, 100
br i1 %phi.cmp, label %if.then, label %if.end
for.body: ; preds = %entry, %for.body
%i.031 = phi i32 [ %inc4, %for.body ], [ 0, %entry ]
%c.0.c.0.c.0.c.0. = load volatile i32, i32* %c, align 4, !tbaa !3
%inc = add nsw i32 %c.0.c.0.c.0.c.0., 1
store volatile i32 %inc, i32* %c, align 4, !tbaa !3
%call = tail call i32 @feclearexcept(i32 31) #5
%a.0.a.0.a.0.a.0. = load volatile i32, i32* %a, align 4, !tbaa !3
%call2 = tail call i32 @fetestexcept(i32 31) #5
%call3 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %call2) #5
%inc4 = add nuw nsw i32 %i.031, 1
%b.0.b.0.b.0.b.0. = load volatile i32, i32* %b, align 4, !tbaa !3
%cmp = icmp slt i32 %inc4, %b.0.b.0.b.0.b.0.
br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge, !llvm.loop !7
if.then: ; preds = %for.cond.for.cond.cleanup_crit_edge
%a.0.a.0.a.0.a.0.23 = load volatile i32, i32* %a, align 4, !tbaa !3
%b.0.b.0.b.0.b.0.20 = load volatile i32, i32* %b, align 4, !tbaa !3
%add = add nsw i32 %b.0.b.0.b.0.b.0.20, %a.0.a.0.a.0.a.0.23
%c.0.c.0.c.0.c.0.17 = load volatile i32, i32* %c, align 4, !tbaa !3
%add7 = add nsw i32 %add, %c.0.c.0.c.0.c.0.17
br label %cleanup
if.end: ; preds = %entry, %for.cond.for.cond.cleanup_crit_edge
%a.0.a.0.a.0.a.0.24 = load volatile i32, i32* %a, align 4, !tbaa !3
%b.0.b.0.b.0.b.0.21 = load volatile i32, i32* %b, align 4, !tbaa !3
%mul.neg = mul i32 %b.0.b.0.b.0.b.0.21, -6
%sub = add i32 %mul.neg, %a.0.a.0.a.0.a.0.24
%c.0.c.0.c.0.c.0.18 = load volatile i32, i32* %c, align 4, !tbaa !3
%mul8 = mul nsw i32 %c.0.c.0.c.0.c.0.18, 3
%add9 = add nsw i32 %sub, %mul8
br label %cleanup
cleanup: ; preds = %if.end, %if.then
%retval.0 = phi i32 [ %add7, %if.then ], [ %add9, %if.end ]
%1 = bitcast i32* %c to i8*
%2 = bitcast i32* %b to i8*
%3 = bitcast i32* %a to i8*
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1)
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2)
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %3)
ret i32 %retval.0
}
declare dso_local i32 @feclearexcept(i32) local_unnamed_addr #3
declare dso_local i32 @fetestexcept(i32) local_unnamed_addr #3
; Function Attrs: nofree nounwind
declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) local_unnamed_addr #4
attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv68" "target-features"="+v68,-long-calls" }
attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn }
attributes #2 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv68" "target-features"="+v68,-long-calls" }
attributes #3 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv68" "target-features"="+v68,-long-calls" }
attributes #4 = { nofree nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv68" "target-features"="+v68,-long-calls" }
attributes #5 = { nounwind }
!llvm.module.flags = !{!0, !1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"frame-pointer", i32 2}
!3 = !{!4, !4, i64 0}
!4 = !{!"int", !5, i64 0}
!5 = !{!"omnipotent char", !6, i64 0}
!6 = !{!"Simple C/C++ TBAA"}
!7 = distinct !{!7, !8}
!8 = !{!"llvm.loop.mustprogress"}
...
---
name: syst_int32_to_float32
alignment: 16
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers:
- { id: 0, class: intregs, preferred-register: '' }
- { id: 1, class: intregs, preferred-register: '' }
liveins:
- { reg: '$r0', virtual-reg: '%0' }
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 1
adjustsStack: false
hasCalls: false
stackProtector: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
hasTailCall: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack: []
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
liveins: $r0
%0:intregs = COPY $r0
%1:intregs = F2_conv_w2sf %0, implicit $usr
$r0 = COPY %1
PS_jmpret $r31, implicit-def dead $pc, implicit $r0
...
---
name: main
alignment: 16
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
registers:
- { id: 0, class: intregs, preferred-register: '' }
- { id: 1, class: intregs, preferred-register: '' }
- { id: 2, class: intregs, preferred-register: '' }
- { id: 3, class: intregs, preferred-register: '' }
- { id: 4, class: intregs, preferred-register: '' }
- { id: 5, class: intregs, preferred-register: '' }
- { id: 6, class: intregs, preferred-register: '' }
- { id: 7, class: intregs, preferred-register: '' }
- { id: 8, class: predregs, preferred-register: '' }
- { id: 9, class: intregs, preferred-register: '' }
- { id: 10, class: intregs, preferred-register: '' }
- { id: 11, class: intregs, preferred-register: '' }
- { id: 12, class: intregs, preferred-register: '' }
- { id: 13, class: intregs, preferred-register: '' }
- { id: 14, class: intregs, preferred-register: '' }
- { id: 15, class: intregs, preferred-register: '' }
- { id: 16, class: predregs, preferred-register: '' }
- { id: 17, class: intregs, preferred-register: '' }
- { id: 18, class: predregs, preferred-register: '' }
- { id: 19, class: intregs, preferred-register: '' }
- { id: 20, class: intregs, preferred-register: '' }
- { id: 21, class: intregs, preferred-register: '' }
- { id: 22, class: intregs, preferred-register: '' }
- { id: 23, class: intregs, preferred-register: '' }
- { id: 24, class: intregs, preferred-register: '' }
- { id: 25, class: intregs, preferred-register: '' }
- { id: 26, class: intregs, preferred-register: '' }
- { id: 27, class: intregs, preferred-register: '' }
liveins: []
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: false
hasCalls: true
stackProtector: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
hasTailCall: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack:
- { id: 0, name: a, type: default, offset: 0, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 1, name: b, type: default, offset: 0, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
- { id: 2, name: c, type: default, offset: 0, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo: {}
body: |
bb.0.entry:
successors: %bb.6(0x50000000), %bb.4(0x30000000)
S4_storeiri_io %stack.0.a, 0, -16777235 :: (volatile store (s32) into %ir.a, !tbaa !3)
S4_storeiri_io %stack.1.b, 0, 34 :: (volatile store (s32) into %ir.b, !tbaa !3)
S4_storeiri_io %stack.2.c, 0, 34 :: (volatile store (s32) into %ir.c, !tbaa !3)
%7:intregs = L2_loadri_io %stack.1.b, 0 :: (volatile dereferenceable load (s32) from %ir.b, !tbaa !3)
%8:predregs = C2_cmpgti %7, 0
%6:intregs = A2_tfrsi 0
J2_jumpf %8, %bb.4, implicit-def $pc
bb.6:
successors: %bb.2(0x80000000)
%9:intregs = A2_tfrsi 31
%13:intregs = A2_tfrsi @.str
J2_jump %bb.2, implicit-def $pc
bb.1.for.cond.for.cond.cleanup_crit_edge:
successors: %bb.4(0x40000000)
J2_jump %bb.4, implicit-def dead $pc
bb.2.for.body:
successors: %bb.2(0x7c000000), %bb.1(0x04000000)
%0:intregs = PHI %6, %bb.6, %2, %bb.2
L4_iadd_memopw_io %stack.2.c, 0, 1 :: (volatile store (s32) into %ir.c, !tbaa !3), (volatile dereferenceable load (s32) from %ir.c, !tbaa !3)
ADJCALLSTACKDOWN 0, 0, implicit-def $r29, implicit-def dead $r30, implicit $r31, implicit $r30, implicit $r29
$r0 = COPY %9
J2_call @feclearexcept, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit $r0, implicit-def $r29, implicit-def $r0
ADJCALLSTACKUP 0, 0, implicit-def dead $r29, implicit-def dead $r30, implicit-def dead $r31, implicit $r29
%1:intregs = L2_loadri_io %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a, !tbaa !3)
ADJCALLSTACKDOWN 0, 0, implicit-def $r29, implicit-def dead $r30, implicit $r31, implicit $r30, implicit $r29
%17:intregs = F2_conv_w2sf %1, implicit $usr
$r0 = COPY %9
J2_call @fetestexcept, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit $r0, implicit-def $r29, implicit-def $r0
ADJCALLSTACKUP 0, 0, implicit-def dead $r29, implicit-def dead $r30, implicit-def dead $r31, implicit $r29
%11:intregs = COPY $r0
%12:intregs = COPY $r29
S2_storeri_io %12, 0, %11 :: (store (s32) into stack)
ADJCALLSTACKDOWN 4, 0, implicit-def $r29, implicit-def dead $r30, implicit $r31, implicit $r30, implicit $r29
$r0 = COPY %13
J2_call @printf, hexagoncsr, implicit-def dead $pc, implicit-def dead $r31, implicit $r29, implicit $r0, implicit-def $r29, implicit-def $r0
ADJCALLSTACKUP 4, 0, implicit-def dead $r29, implicit-def dead $r30, implicit-def dead $r31, implicit $r29
%2:intregs = nuw nsw A2_addi %0, 1
%15:intregs = L2_loadri_io %stack.1.b, 0 :: (volatile dereferenceable load (s32) from %ir.b, !tbaa !3)
%16:predregs = C2_cmpgt %15, %2
J2_jumpt %16, %bb.2, implicit-def dead $pc
J2_jump %bb.1, implicit-def dead $pc
bb.3.if.then:
successors: %bb.5(0x80000000)
%18:predregs = C2_cmpgtui %17, 100
%24:intregs = L2_loadri_io %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a, !tbaa !3)
%25:intregs = L2_loadri_io %stack.1.b, 0 :: (volatile dereferenceable load (s32) from %ir.b, !tbaa !3)
%26:intregs = L2_loadri_io %stack.2.c, 0 :: (volatile dereferenceable load (s32) from %ir.c, !tbaa !3)
%3:intregs = nsw M2_acci %26, %25, %24
J2_jumpf %18, %bb.5, implicit-def dead $pc
J2_jump %bb.5, implicit-def dead $pc
bb.4.if.end:
successors: %bb.5(0x80000000)
%19:intregs = L2_loadri_io %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a, !tbaa !3)
%20:intregs = L2_loadri_io %stack.1.b, 0 :: (volatile dereferenceable load (s32) from %ir.b, !tbaa !3)
%27:intregs = M2_macsin %19, %20, 6
%23:intregs = L2_loadri_io %stack.2.c, 0 :: (volatile dereferenceable load (s32) from %ir.c, !tbaa !3)
%4:intregs = nsw M2_macsip %27, %23, 3
bb.5.cleanup:
%5:intregs = PHI %4, %bb.4, %3, %bb.3
$r0 = COPY %5
PS_jmpret $r31, implicit-def dead $pc, implicit $r0
...