Files
clang-p2996/llvm/test/CodeGen/SystemZ/memset-05.ll
Jonas Paulsson 00baad35b2 [SystemZ] Bugfix and refactorization of mem-mem operations
This patch fixes the bug that consisted of treating variable / immediate
length mem operations (such as memcpy, memset, ...) differently. The variable
length case needs to have the length minus 1 passed due to the use of EXRL
target instructions. However, the DAGCombiner can convert a register length
argument into a constant one, and whenever that happened one byte too little
would end up being performed.

This is also a refactorization by reducing the number of opcodes and variants
involved. For any opcode (variable or constant length), only the length minus
one is passed on to the ISD node. The rest of the logic is now instead
handled during isel pseudo expansion.

Review: Ulrich Weigand

Differential Revision: https://reviews.llvm.org/D111729
2021-10-14 10:37:33 +02:00

255 lines
9.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; Test memset 0 with variable length
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
define void @fun0(i8* %Addr, i64 %Len) {
; CHECK-LABEL: fun0:
; CHECK: # %bb.0:
; CHECK-NEXT: aghi %r3, -1
; CHECK-NEXT: cgibe %r3, -1, 0(%r14)
; CHECK-NEXT: .LBB0_1:
; CHECK-NEXT: srlg %r0, %r3, 8
; CHECK-NEXT: cgije %r0, 0, .LBB0_3
; CHECK-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: xc 0(256,%r2), 0(%r2)
; CHECK-NEXT: la %r2, 256(%r2)
; CHECK-NEXT: brctg %r0, .LBB0_2
; CHECK-NEXT: .LBB0_3:
; CHECK-NEXT: exrl %r3, .Ltmp0
; CHECK-NEXT: br %r14
tail call void @llvm.memset.p0i8.i64(i8* %Addr, i8 0, i64 %Len, i1 false)
ret void
}
define void @fun1(i8* %Addr, i32 %Len) {
; CHECK-LABEL: fun1:
; CHECK: # %bb.0:
; CHECK-NEXT: llgfr %r1, %r3
; CHECK-NEXT: aghi %r1, -1
; CHECK-NEXT: cgibe %r1, -1, 0(%r14)
; CHECK-NEXT: .LBB1_1:
; CHECK-NEXT: srlg %r0, %r1, 8
; CHECK-NEXT: cgije %r0, 0, .LBB1_3
; CHECK-NEXT: .LBB1_2: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: xc 0(256,%r2), 0(%r2)
; CHECK-NEXT: la %r2, 256(%r2)
; CHECK-NEXT: brctg %r0, .LBB1_2
; CHECK-NEXT: .LBB1_3:
; CHECK-NEXT: exrl %r1, .Ltmp0
; CHECK-NEXT: br %r14
tail call void @llvm.memset.p0i8.i32(i8* %Addr, i8 0, i32 %Len, i1 false)
ret void
}
; Test that identical target instructions get reused.
define void @fun2(i8* %Addr, i32 %Len) {
; CHECK-LABEL: fun2:
; CHECK: # %bb.0:
; CHECK-NEXT: llgfr %r1, %r3
; CHECK-NEXT: aghi %r1, -1
; CHECK-NEXT: cgije %r1, -1, .LBB2_5
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: srlg %r0, %r1, 8
; CHECK-NEXT: lgr %r3, %r2
; CHECK-NEXT: cgije %r0, 0, .LBB2_4
; CHECK-NEXT: # %bb.2:
; CHECK-NEXT: lgr %r3, %r2
; CHECK-NEXT: .LBB2_3: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: xc 0(256,%r3), 0(%r3)
; CHECK-NEXT: la %r3, 256(%r3)
; CHECK-NEXT: brctg %r0, .LBB2_3
; CHECK-NEXT: .LBB2_4:
; CHECK-NEXT: exrl %r1, .Ltmp1
; CHECK-NEXT: .LBB2_5:
; CHECK-NEXT: cgije %r1, -1, .LBB2_10
; CHECK-NEXT: # %bb.6:
; CHECK-NEXT: srlg %r0, %r1, 8
; CHECK-NEXT: lgr %r3, %r2
; CHECK-NEXT: cgije %r0, 0, .LBB2_9
; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: lgr %r3, %r2
; CHECK-NEXT: .LBB2_8: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: xc 0(256,%r3), 0(%r3)
; CHECK-NEXT: la %r3, 256(%r3)
; CHECK-NEXT: brctg %r0, .LBB2_8
; CHECK-NEXT: .LBB2_9:
; CHECK-NEXT: exrl %r1, .Ltmp1
; CHECK-NEXT: .LBB2_10:
; CHECK-NEXT: cgibe %r1, -1, 0(%r14)
; CHECK-NEXT: .LBB2_11:
; CHECK-NEXT: srlg %r0, %r1, 8
; CHECK-NEXT: cgije %r0, 0, .LBB2_13
; CHECK-NEXT: .LBB2_12: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: xc 0(256,%r2), 0(%r2)
; CHECK-NEXT: la %r2, 256(%r2)
; CHECK-NEXT: brctg %r0, .LBB2_12
; CHECK-NEXT: .LBB2_13:
; CHECK-NEXT: exrl %r1, .Ltmp0
; CHECK-NEXT: br %r14
tail call void @llvm.memset.p0i8.i32(i8* %Addr, i8 0, i32 %Len, i1 false)
tail call void @llvm.memset.p0i8.i32(i8* %Addr, i8 0, i32 %Len, i1 false)
tail call void @llvm.memset.p0i8.i32(i8* %Addr, i8 0, i32 %Len, i1 false)
ret void
}
; Test that a memset to nullptr compiles.
define void @fun3(i64 %Len) {
; CHECK-LABEL: fun3:
; CHECK: # %bb.0:
; CHECK-NEXT: aghi %r2, -1
; CHECK-NEXT: cgibe %r2, -1, 0(%r14)
; CHECK-NEXT: .LBB3_1:
; CHECK-NEXT: srlg %r0, %r2, 8
; CHECK-NEXT: lghi %r1, 0
; CHECK-NEXT: cgije %r0, 0, .LBB3_3
; CHECK-NEXT: .LBB3_2: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: xc 0(256,%r1), 0(%r1)
; CHECK-NEXT: la %r1, 256(%r1)
; CHECK-NEXT: brctg %r0, .LBB3_2
; CHECK-NEXT: .LBB3_3:
; CHECK-NEXT: exrl %r2, .Ltmp2
; CHECK-NEXT: br %r14
call void @llvm.memset.p0i8.i64(i8* null, i8 0, i64 %Len, i1 false)
ret void
}
; Test that a memset with a length argument that DAGCombiner will convert
; into a constant get the correct number of bytes set.
@Data = external hidden constant [1024 x i8], align 2
define void @fun4() {
; CHECK-LABEL: fun4:
; CHECK: # %bb.0:
; CHECK-NEXT: larl %r1, Data
; CHECK-NEXT: xc 35(256,%r1), 35(%r1)
; CHECK-NEXT: xc 291(256,%r1), 291(%r1)
; CHECK-NEXT: xc 547(256,%r1), 547(%r1)
; CHECK-NEXT: xc 803(221,%r1), 803(%r1)
; CHECK-NEXT: mvghi 0(%r1), 989
; CHECK-NEXT: br %r14
call void @llvm.memset.p0i8.i64(
i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @Data, i64 0, i64 35),
i8 0,
i64 sub (i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
[1024 x i8]* @Data, i64 1, i64 0) to i64), i64 1),
i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
[1024 x i8]* @Data, i64 0, i64 35) to i64), i64 1)),
i1 false)
%i11 = getelementptr i8, i8* null,
i64 sub (i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
[1024 x i8]* @Data, i64 1, i64 0) to i64), i64 1),
i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
[1024 x i8]* @Data, i64 0, i64 35) to i64), i64 1))
store i8* %i11, i8** undef, align 8
ret void
}
; The same, with a resulting constant length of 0.
define void @fun5() {
; CHECK-LABEL: fun5:
; CHECK: # %bb.0:
; CHECK-NEXT: mvghi 0(%r1), 0
; CHECK-NEXT: br %r14
call void @llvm.memset.p0i8.i64(
i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @Data, i64 0, i64 35),
i8 0,
i64 sub (i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
[1024 x i8]* @Data, i64 1, i64 35) to i64), i64 1),
i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
[1024 x i8]* @Data, i64 1, i64 35) to i64), i64 1)),
i1 false)
%i11 = getelementptr i8, i8* null,
i64 sub (i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
[1024 x i8]* @Data, i64 1, i64 35) to i64), i64 1),
i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
[1024 x i8]* @Data, i64 1, i64 35) to i64), i64 1))
store i8* %i11, i8** undef, align 8
ret void
}
; The same, with a resulting constant length of 1.
define void @fun6() {
; CHECK-LABEL: fun6:
; CHECK: # %bb.0:
; CHECK-NEXT: larl %r1, Data
; CHECK-NEXT: xc 35(1,%r1), 35(%r1)
; CHECK-NEXT: mvghi 0(%r1), 1
; CHECK-NEXT: br %r14
call void @llvm.memset.p0i8.i64(
i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @Data, i64 0, i64 35),
i8 0,
i64 sub (i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
[1024 x i8]* @Data, i64 1, i64 36) to i64), i64 1),
i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
[1024 x i8]* @Data, i64 1, i64 35) to i64), i64 1)),
i1 false)
%i11 = getelementptr i8, i8* null,
i64 sub (i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
[1024 x i8]* @Data, i64 1, i64 36) to i64), i64 1),
i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
[1024 x i8]* @Data, i64 1, i64 35) to i64), i64 1))
store i8* %i11, i8** undef, align 8
ret void
}
; The same, with a resulting constant length of 256.
define void @fun7() {
; CHECK-LABEL: fun7:
; CHECK: # %bb.0:
; CHECK-NEXT: larl %r1, Data
; CHECK-NEXT: xc 35(256,%r1), 35(%r1)
; CHECK-NEXT: mvghi 0(%r1), 256
; CHECK-NEXT: br %r14
call void @llvm.memset.p0i8.i64(
i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @Data, i64 0, i64 35),
i8 0,
i64 sub (i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
[1024 x i8]* @Data, i64 1, i64 291) to i64), i64 1),
i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
[1024 x i8]* @Data, i64 1, i64 35) to i64), i64 1)),
i1 false)
%i11 = getelementptr i8, i8* null,
i64 sub (i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
[1024 x i8]* @Data, i64 1, i64 291) to i64), i64 1),
i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
[1024 x i8]* @Data, i64 1, i64 35) to i64), i64 1))
store i8* %i11, i8** undef, align 8
ret void
}
; The same, with a resulting constant length of 257.
define void @fun8() {
; CHECK-LABEL: fun8:
; CHECK: # %bb.0:
; CHECK-NEXT: larl %r1, Data
; CHECK-NEXT: xc 35(256,%r1), 35(%r1)
; CHECK-NEXT: xc 291(1,%r1), 291(%r1)
; CHECK-NEXT: mvghi 0(%r1), 257
; CHECK-NEXT: br %r14
call void @llvm.memset.p0i8.i64(
i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @Data, i64 0, i64 35),
i8 0,
i64 sub (i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
[1024 x i8]* @Data, i64 1, i64 292) to i64), i64 1),
i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
[1024 x i8]* @Data, i64 1, i64 35) to i64), i64 1)),
i1 false)
%i11 = getelementptr i8, i8* null,
i64 sub (i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
[1024 x i8]* @Data, i64 1, i64 292) to i64), i64 1),
i64 add (i64 ptrtoint (i8* getelementptr inbounds ([1024 x i8],
[1024 x i8]* @Data, i64 1, i64 35) to i64), i64 1))
store i8* %i11, i8** undef, align 8
ret void
}
; CHECK: .Ltmp2:
; CHECK-NEXT: xc 0(1,%r1), 0(%r1)
; CHECK-NEXT: .Ltmp0:
; CHECK-NEXT: xc 0(1,%r2), 0(%r2)
; CHECK-NEXT: .Ltmp1:
; CHECK-NEXT: xc 0(1,%r3), 0(%r3)
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg)