This patch fixes the bug that consisted of treating variable / immediate length mem operations (such as memcpy, memset, ...) differently. The variable length case needs to have the length minus 1 passed due to the use of EXRL target instructions. However, the DAGCombiner can convert a register length argument into a constant one, and whenever that happened one byte too little would end up being performed. This is also a refactorization by reducing the number of opcodes and variants involved. For any opcode (variable or constant length), only the length minus one is passed on to the ISD node. The rest of the logic is now instead handled during isel pseudo expansion. Review: Ulrich Weigand Differential Revision: https://reviews.llvm.org/D111729
255 lines
9.9 KiB
LLVM
255 lines
9.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; Test memset 0 with variable length
|
|
;
|
|
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
|
|
|
|
define void @fun0(i8* %Addr, i64 %Len) {
|
|
; CHECK-LABEL: fun0:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: aghi %r3, -1
|
|
; CHECK-NEXT: cgibe %r3, -1, 0(%r14)
|
|
; CHECK-NEXT: .LBB0_1:
|
|
; CHECK-NEXT: srlg %r0, %r3, 8
|
|
; CHECK-NEXT: cgije %r0, 0, .LBB0_3
|
|
; CHECK-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: xc 0(256,%r2), 0(%r2)
|
|
; CHECK-NEXT: la %r2, 256(%r2)
|
|
; CHECK-NEXT: brctg %r0, .LBB0_2
|
|
; CHECK-NEXT: .LBB0_3:
|
|
; CHECK-NEXT: exrl %r3, .Ltmp0
|
|
; CHECK-NEXT: br %r14
|
|
tail call void @llvm.memset.p0i8.i64(i8* %Addr, i8 0, i64 %Len, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @fun1(i8* %Addr, i32 %Len) {
|
|
; CHECK-LABEL: fun1:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: llgfr %r1, %r3
|
|
; CHECK-NEXT: aghi %r1, -1
|
|
; CHECK-NEXT: cgibe %r1, -1, 0(%r14)
|
|
; CHECK-NEXT: .LBB1_1:
|
|
; CHECK-NEXT: srlg %r0, %r1, 8
|
|
; CHECK-NEXT: cgije %r0, 0, .LBB1_3
|
|
; CHECK-NEXT: .LBB1_2: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: xc 0(256,%r2), 0(%r2)
|
|
; CHECK-NEXT: la %r2, 256(%r2)
|
|
; CHECK-NEXT: brctg %r0, .LBB1_2
|
|
; CHECK-NEXT: .LBB1_3:
|
|
; CHECK-NEXT: exrl %r1, .Ltmp0
|
|
; CHECK-NEXT: br %r14
|
|
tail call void @llvm.memset.p0i8.i32(i8* %Addr, i8 0, i32 %Len, i1 false)
|
|
ret void
|
|
}
|
|
|
|
; Test that identical target instructions get reused.
|
|
define void @fun2(i8* %Addr, i32 %Len) {
|
|
; CHECK-LABEL: fun2:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: llgfr %r1, %r3
|
|
; CHECK-NEXT: aghi %r1, -1
|
|
; CHECK-NEXT: cgije %r1, -1, .LBB2_5
|
|
; CHECK-NEXT: # %bb.1:
|
|
; CHECK-NEXT: srlg %r0, %r1, 8
|
|
; CHECK-NEXT: lgr %r3, %r2
|
|
; CHECK-NEXT: cgije %r0, 0, .LBB2_4
|
|
; CHECK-NEXT: # %bb.2:
|
|
; CHECK-NEXT: lgr %r3, %r2
|
|
; CHECK-NEXT: .LBB2_3: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: xc 0(256,%r3), 0(%r3)
|
|
; CHECK-NEXT: la %r3, 256(%r3)
|
|
; CHECK-NEXT: brctg %r0, .LBB2_3
|
|
; CHECK-NEXT: .LBB2_4:
|
|
; CHECK-NEXT: exrl %r1, .Ltmp1
|
|
; CHECK-NEXT: .LBB2_5:
|
|
; CHECK-NEXT: cgije %r1, -1, .LBB2_10
|
|
; CHECK-NEXT: # %bb.6:
|
|
; CHECK-NEXT: srlg %r0, %r1, 8
|
|
; CHECK-NEXT: lgr %r3, %r2
|
|
; CHECK-NEXT: cgije %r0, 0, .LBB2_9
|
|
; CHECK-NEXT: # %bb.7:
|
|
; CHECK-NEXT: lgr %r3, %r2
|
|
; CHECK-NEXT: .LBB2_8: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: xc 0(256,%r3), 0(%r3)
|
|
; CHECK-NEXT: la %r3, 256(%r3)
|
|
; CHECK-NEXT: brctg %r0, .LBB2_8
|
|
; CHECK-NEXT: .LBB2_9:
|
|
; CHECK-NEXT: exrl %r1, .Ltmp1
|
|
; CHECK-NEXT: .LBB2_10:
|
|
; CHECK-NEXT: cgibe %r1, -1, 0(%r14)
|
|
; CHECK-NEXT: .LBB2_11:
|
|
; CHECK-NEXT: srlg %r0, %r1, 8
|
|
; CHECK-NEXT: cgije %r0, 0, .LBB2_13
|
|
; CHECK-NEXT: .LBB2_12: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: xc 0(256,%r2), 0(%r2)
|
|
; CHECK-NEXT: la %r2, 256(%r2)
|
|
; CHECK-NEXT: brctg %r0, .LBB2_12
|
|
; CHECK-NEXT: .LBB2_13:
|
|
; CHECK-NEXT: exrl %r1, .Ltmp0
|
|
; CHECK-NEXT: br %r14
|
|
tail call void @llvm.memset.p0i8.i32(i8* %Addr, i8 0, i32 %Len, i1 false)
|
|
tail call void @llvm.memset.p0i8.i32(i8* %Addr, i8 0, i32 %Len, i1 false)
|
|
tail call void @llvm.memset.p0i8.i32(i8* %Addr, i8 0, i32 %Len, i1 false)
|
|
ret void
|
|
}
|
|
|
|
; Test that a memset to nullptr compiles.
|
|
define void @fun3(i64 %Len) {
|
|
; CHECK-LABEL: fun3:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: aghi %r2, -1
|
|
; CHECK-NEXT: cgibe %r2, -1, 0(%r14)
|
|
; CHECK-NEXT: .LBB3_1:
|
|
; CHECK-NEXT: srlg %r0, %r2, 8
|
|
; CHECK-NEXT: lghi %r1, 0
|
|
; CHECK-NEXT: cgije %r0, 0, .LBB3_3
|
|
; CHECK-NEXT: .LBB3_2: # =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: xc 0(256,%r1), 0(%r1)
|
|
; CHECK-NEXT: la %r1, 256(%r1)
|
|
; CHECK-NEXT: brctg %r0, .LBB3_2
|
|
; CHECK-NEXT: .LBB3_3:
|
|
; CHECK-NEXT: exrl %r2, .Ltmp2
|
|
; CHECK-NEXT: br %r14
|
|
call void @llvm.memset.p0i8.i64(i8* null, i8 0, i64 %Len, i1 false)
|
|
ret void
|
|
}
|
|
|
|
; Test that a memset with a length argument that DAGCombiner will convert
|
|
; into a constant get the correct number of bytes set.
|
|
@Data = external hidden constant [1024 x i8], align 2
|
|
define void @fun4() {
|
|
; CHECK-LABEL: fun4:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: larl %r1, Data
|
|
; CHECK-NEXT: xc 35(256,%r1), 35(%r1)
|
|
; CHECK-NEXT: xc 291(256,%r1), 291(%r1)
|
|
; CHECK-NEXT: xc 547(256,%r1), 547(%r1)
|
|
; CHECK-NEXT: xc 803(221,%r1), 803(%r1)
|
|
; CHECK-NEXT: mvghi 0(%r1), 989
|
|
; CHECK-NEXT: br %r14
|
|
call void @llvm.memset.p0i8.i64(
|
|
i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @Data, i64 0, i64 35),
|
|
i8 0,
|
|
i64 sub (i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
|
|
[1024 x i8]* @Data, i64 1, i64 0) to i64), i64 1),
|
|
i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
|
|
[1024 x i8]* @Data, i64 0, i64 35) to i64), i64 1)),
|
|
i1 false)
|
|
%i11 = getelementptr i8, i8* null,
|
|
i64 sub (i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
|
|
[1024 x i8]* @Data, i64 1, i64 0) to i64), i64 1),
|
|
i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
|
|
[1024 x i8]* @Data, i64 0, i64 35) to i64), i64 1))
|
|
store i8* %i11, i8** undef, align 8
|
|
ret void
|
|
}
|
|
|
|
; The same, with a resulting constant length of 0.
|
|
define void @fun5() {
|
|
; CHECK-LABEL: fun5:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: mvghi 0(%r1), 0
|
|
; CHECK-NEXT: br %r14
|
|
call void @llvm.memset.p0i8.i64(
|
|
i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @Data, i64 0, i64 35),
|
|
i8 0,
|
|
i64 sub (i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
|
|
[1024 x i8]* @Data, i64 1, i64 35) to i64), i64 1),
|
|
i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
|
|
[1024 x i8]* @Data, i64 1, i64 35) to i64), i64 1)),
|
|
i1 false)
|
|
%i11 = getelementptr i8, i8* null,
|
|
i64 sub (i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
|
|
[1024 x i8]* @Data, i64 1, i64 35) to i64), i64 1),
|
|
i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
|
|
[1024 x i8]* @Data, i64 1, i64 35) to i64), i64 1))
|
|
store i8* %i11, i8** undef, align 8
|
|
ret void
|
|
}
|
|
|
|
; The same, with a resulting constant length of 1.
|
|
define void @fun6() {
|
|
; CHECK-LABEL: fun6:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: larl %r1, Data
|
|
; CHECK-NEXT: xc 35(1,%r1), 35(%r1)
|
|
; CHECK-NEXT: mvghi 0(%r1), 1
|
|
; CHECK-NEXT: br %r14
|
|
call void @llvm.memset.p0i8.i64(
|
|
i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @Data, i64 0, i64 35),
|
|
i8 0,
|
|
i64 sub (i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
|
|
[1024 x i8]* @Data, i64 1, i64 36) to i64), i64 1),
|
|
i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
|
|
[1024 x i8]* @Data, i64 1, i64 35) to i64), i64 1)),
|
|
i1 false)
|
|
%i11 = getelementptr i8, i8* null,
|
|
i64 sub (i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
|
|
[1024 x i8]* @Data, i64 1, i64 36) to i64), i64 1),
|
|
i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
|
|
[1024 x i8]* @Data, i64 1, i64 35) to i64), i64 1))
|
|
store i8* %i11, i8** undef, align 8
|
|
ret void
|
|
}
|
|
|
|
; The same, with a resulting constant length of 256.
|
|
define void @fun7() {
|
|
; CHECK-LABEL: fun7:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: larl %r1, Data
|
|
; CHECK-NEXT: xc 35(256,%r1), 35(%r1)
|
|
; CHECK-NEXT: mvghi 0(%r1), 256
|
|
; CHECK-NEXT: br %r14
|
|
call void @llvm.memset.p0i8.i64(
|
|
i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @Data, i64 0, i64 35),
|
|
i8 0,
|
|
i64 sub (i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
|
|
[1024 x i8]* @Data, i64 1, i64 291) to i64), i64 1),
|
|
i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
|
|
[1024 x i8]* @Data, i64 1, i64 35) to i64), i64 1)),
|
|
i1 false)
|
|
%i11 = getelementptr i8, i8* null,
|
|
i64 sub (i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
|
|
[1024 x i8]* @Data, i64 1, i64 291) to i64), i64 1),
|
|
i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
|
|
[1024 x i8]* @Data, i64 1, i64 35) to i64), i64 1))
|
|
store i8* %i11, i8** undef, align 8
|
|
ret void
|
|
}
|
|
|
|
; The same, with a resulting constant length of 257.
|
|
define void @fun8() {
|
|
; CHECK-LABEL: fun8:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: larl %r1, Data
|
|
; CHECK-NEXT: xc 35(256,%r1), 35(%r1)
|
|
; CHECK-NEXT: xc 291(1,%r1), 291(%r1)
|
|
; CHECK-NEXT: mvghi 0(%r1), 257
|
|
; CHECK-NEXT: br %r14
|
|
call void @llvm.memset.p0i8.i64(
|
|
i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @Data, i64 0, i64 35),
|
|
i8 0,
|
|
i64 sub (i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
|
|
[1024 x i8]* @Data, i64 1, i64 292) to i64), i64 1),
|
|
i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
|
|
[1024 x i8]* @Data, i64 1, i64 35) to i64), i64 1)),
|
|
i1 false)
|
|
%i11 = getelementptr i8, i8* null,
|
|
i64 sub (i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
|
|
[1024 x i8]* @Data, i64 1, i64 292) to i64), i64 1),
|
|
i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
|
|
[1024 x i8]* @Data, i64 1, i64 35) to i64), i64 1))
|
|
store i8* %i11, i8** undef, align 8
|
|
ret void
|
|
}
|
|
|
|
; CHECK: .Ltmp2:
|
|
; CHECK-NEXT: xc 0(1,%r1), 0(%r1)
|
|
; CHECK-NEXT: .Ltmp0:
|
|
; CHECK-NEXT: xc 0(1,%r2), 0(%r2)
|
|
; CHECK-NEXT: .Ltmp1:
|
|
; CHECK-NEXT: xc 0(1,%r3), 0(%r3)
|
|
|
|
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
|
|
declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg)
|