Files
clang-p2996/llvm/test/CodeGen/AArch64/pr90936.ll
Craig Topper ef84452571 [DAGCombiner] Be more careful about looking through extends and truncates in mergeTruncStores. (#91375)
Previously we recursively looked through extends and truncates on both
SourceValue and WideVal.

SourceValue is the largest source found for each of the stores we are
combining. WideVal is the source for the current store.

Previously we could incorrectly look through a (zext (trunc X)) pair and
incorrectly believe X to be a good source.

I think we could also look through a zext on one store and a sext on
another store and arbitrarily pick one of the extends as the final
source.

With this patch we only look through one level of extend or truncate.
And we don't look through extends/truncs on both SourceValue and WideVal
at the same time.

This may lose some optimization cases, but keeps everything we had tests
for.

Fixes #90936.
2024-05-07 21:17:50 -07:00

50 lines
1.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
define void @f(i16 %arg, ptr %arg1) {
; CHECK-LABEL: f:
; CHECK: // %bb.0:
; CHECK-NEXT: ubfx w8, w0, #8, #6
; CHECK-NEXT: strb w0, [x1]
; CHECK-NEXT: strb w8, [x1, #1]
; CHECK-NEXT: ret
bb:
%i = trunc i16 %arg to i8
%i2 = trunc i16 %arg to i14
%i3 = lshr i14 %i2, 8
store i8 %i, ptr %arg1, align 1
%i4 = getelementptr i8, ptr %arg1, i64 1
%i5 = trunc i14 %i3 to i8
store i8 %i5, ptr %i4, align 1
ret void
}
define void @g(i32 %arg, ptr %arg1) {
; CHECK-LABEL: g:
; CHECK: // %bb.0: // %bb
; CHECK-NEXT: lsr w8, w0, #8
; CHECK-NEXT: lsr w9, w0, #16
; CHECK-NEXT: strb w0, [x1]
; CHECK-NEXT: strb wzr, [x1, #3]
; CHECK-NEXT: strb w8, [x1, #1]
; CHECK-NEXT: strb w9, [x1, #2]
; CHECK-NEXT: ret
bb:
%i = trunc i32 %arg to i8
store i8 %i, ptr %arg1, align 1
%i2 = lshr i32 %arg, 8
%i3 = trunc i32 %i2 to i8
%i4 = getelementptr i8, ptr %arg1, i64 1
store i8 %i3, ptr %i4, align 1
%i5 = lshr i32 %arg, 16
%i6 = trunc i32 %i5 to i8
%i7 = getelementptr i8, ptr %arg1, i64 2
store i8 %i6, ptr %i7, align 1
%i8 = zext i8 %i to i32
%i9 = lshr i32 %i8, 24
%i10 = getelementptr i8, ptr %arg1, i64 3
%i11 = trunc i32 %i9 to i8
store i8 %i11, ptr %i10, align 1
ret void
}