Second try at A-Wadhwani's https://reviews.llvm.org/D132096, which was reverted. The original patch had three issues: * https://reviews.llvm.org/D134032, which bjope kindly fixed. That patch is merged into this one. * [GHI #57796](https://github.com/llvm/llvm-project/issues/57796). Fixed and added a test. * [GHI #57821](https://github.com/llvm/llvm-project/issues/57821). I believe this is an undefined behavior which is not the fault of the original patch. Please see the issue for more details. Original diff summary: This patch adds additional vector types to be considered when doing promotion in SROA, based on the types of the store and load slices. This provides more promotion opportunities, by potentially using an optimal "intermediate" vector type. For example, the following code would currently not be promoted to a vector, since `__m128i` is a `<2 x i64>` vector. ``` __m128i packfoo0(int a, int b, int c, int d) { int r[4] = {a, b, c, d}; __m128i rm; std::memcpy(&rm, r, sizeof(rm)); return rm; } ``` ``` packfoo0(int, int, int, int): mov dword ptr [rsp - 24], edi mov dword ptr [rsp - 20], esi mov dword ptr [rsp - 16], edx mov dword ptr [rsp - 12], ecx movaps xmm0, xmmword ptr [rsp - 24] ret ``` By also considering the types of the elements, we could find that the `<4 x i32>` type would be valid for promotion, hence removing the memory accesses for this function. In other words, we can explore other new vector types, with the same size but different element types based on the load and store instructions from the Slices, which can provide us more promotion opportunities. Additionally, the step for removing duplicate elements from the `CandidateTys` vector was not using an equality comparator, which has been fixed. Differential Revision: https://reviews.llvm.org/D143225
47 lines
2.1 KiB
LLVM
47 lines
2.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -passes='sroa<preserve-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG
|
|
; RUN: opt < %s -passes='sroa<modify-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG
|
|
|
|
%struct.Value = type { %union.anon }
|
|
%union.anon = type { <32 x i8> }
|
|
|
|
@A = dso_local global i64 0, align 8
|
|
|
|
; Make sure that sroa does not crash when dealing with an invalid vector
|
|
; element type.
|
|
define void @foo() {
|
|
; CHECK-LABEL: @foo(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[REF_TMP_I:%.*]] = alloca [[STRUCT_VALUE:%.*]], align 32
|
|
; CHECK-NEXT: call void @value_create(ptr sret([[STRUCT_VALUE]]) align 32 [[REF_TMP_I]])
|
|
; CHECK-NEXT: [[CALL_I:%.*]] = call align 32 ptr @value_set_type(ptr align 32 [[REF_TMP_I]])
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <32 x i8>, ptr [[CALL_I]], align 32
|
|
; CHECK-NEXT: [[REF_TMP_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <32 x i8> [[TMP0]], <32 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[REF_TMP_SROA_0_0_VEC_EXTRACT]] to x86_mmx
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx [[TMP1]], i8 0)
|
|
; CHECK-NEXT: store x86_mmx [[TMP2]], ptr @A, align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%ref.tmp.i = alloca %struct.Value, align 32
|
|
%ref.tmp = alloca %struct.Value, align 32
|
|
call void @value_create(ptr sret(%struct.Value) align 32 %ref.tmp.i)
|
|
%call.i = call align 32 ptr @value_set_type(ptr align 32 %ref.tmp.i)
|
|
%0 = load <32 x i8>, ptr %call.i, align 32
|
|
store <32 x i8> %0, ptr %ref.tmp, align 32
|
|
%1 = load x86_mmx, ptr %ref.tmp, align 32
|
|
%2 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 0)
|
|
store x86_mmx %2, ptr @A, align 8
|
|
ret void
|
|
}
|
|
|
|
declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8 immarg)
|
|
|
|
declare dso_local void @value_create(ptr sret(%struct.Value) align 32)
|
|
|
|
declare dso_local align 32 ptr @value_set_type(ptr align 32)
|
|
|
|
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
|
; CHECK-MODIFY-CFG: {{.*}}
|
|
; CHECK-PRESERVE-CFG: {{.*}}
|