[DAGCombiner] allow more store merging for non-i8 truncated ops
This is a follow-up suggested in D86420 - if we have a pair of stores in inverted order for the target endian, we can rotate the source bits into place. The "be_i64_to_i16_order" test shows a limitation of the current function (which might be avoided if we integrate this function with the other cases in mergeConsecutiveStores). In the earlier "be_i64_to_i16" test, we skip the first 2 stores because we do not match the full set as consecutive or rotate-able, but then we reach the last 2 stores and see that they are an inverted pair of 16-bit stores. The "be_i64_to_i16_order" test alters the program order of the stores, so we miss matching the sub-pattern. Differential Revision: https://reviews.llvm.org/D87112
This commit is contained in:
@@ -7011,12 +7011,15 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
|
||||
|
||||
// Check if the offsets line up for the native data layout of this target.
|
||||
bool NeedBswap = false;
|
||||
bool NeedRotate = false;
|
||||
if (!checkOffsets(Layout.isLittleEndian())) {
|
||||
// Special-case: check if byte offsets line up for the opposite endian.
|
||||
// TODO: We could use rotates for 16/32-bit merge pairs.
|
||||
if (NarrowNumBits != 8 || !checkOffsets(Layout.isBigEndian()))
|
||||
if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
|
||||
NeedBswap = true;
|
||||
else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
|
||||
NeedRotate = true;
|
||||
else
|
||||
return SDValue();
|
||||
NeedBswap = true;
|
||||
}
|
||||
|
||||
SDLoc DL(N);
|
||||
@@ -7026,11 +7029,16 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
|
||||
SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
|
||||
}
|
||||
|
||||
// Before legalize we can introduce illegal bswaps which will be later
|
||||
// Before legalize we can introduce illegal bswaps/rotates which will be later
|
||||
// converted to an explicit bswap sequence. This way we end up with a single
|
||||
// store and byte shuffling instead of several stores and byte shuffling.
|
||||
if (NeedBswap)
|
||||
if (NeedBswap) {
|
||||
SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
|
||||
} else if (NeedRotate) {
|
||||
assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
|
||||
SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
|
||||
SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
|
||||
}
|
||||
|
||||
SDValue NewStore =
|
||||
DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
|
||||
|
||||
@@ -207,9 +207,8 @@ define void @le_i32_to_i16(i32 %x, i16* %p0) {
|
||||
;
|
||||
; BE-LABEL: le_i32_to_i16:
|
||||
; BE: // %bb.0:
|
||||
; BE-NEXT: lsr w8, w0, #16
|
||||
; BE-NEXT: strh w0, [x1]
|
||||
; BE-NEXT: strh w8, [x1, #2]
|
||||
; BE-NEXT: ror w8, w0, #16
|
||||
; BE-NEXT: str w8, [x1]
|
||||
; BE-NEXT: ret
|
||||
%sh1 = lshr i32 %x, 16
|
||||
%t0 = trunc i32 %x to i16
|
||||
@@ -228,9 +227,8 @@ define void @le_i32_to_i16_order(i32 %x, i16* %p0) {
|
||||
;
|
||||
; BE-LABEL: le_i32_to_i16_order:
|
||||
; BE: // %bb.0:
|
||||
; BE-NEXT: lsr w8, w0, #16
|
||||
; BE-NEXT: strh w8, [x1, #2]
|
||||
; BE-NEXT: strh w0, [x1]
|
||||
; BE-NEXT: ror w8, w0, #16
|
||||
; BE-NEXT: str w8, [x1]
|
||||
; BE-NEXT: ret
|
||||
%sh1 = lshr i32 %x, 16
|
||||
%t0 = trunc i32 %x to i16
|
||||
@@ -244,9 +242,8 @@ define void @le_i32_to_i16_order(i32 %x, i16* %p0) {
|
||||
define void @be_i32_to_i16(i32 %x, i16* %p0) {
|
||||
; LE-LABEL: be_i32_to_i16:
|
||||
; LE: // %bb.0:
|
||||
; LE-NEXT: lsr w8, w0, #16
|
||||
; LE-NEXT: strh w0, [x1, #2]
|
||||
; LE-NEXT: strh w8, [x1]
|
||||
; LE-NEXT: ror w8, w0, #16
|
||||
; LE-NEXT: str w8, [x1]
|
||||
; LE-NEXT: ret
|
||||
;
|
||||
; BE-LABEL: be_i32_to_i16:
|
||||
@@ -265,9 +262,8 @@ define void @be_i32_to_i16(i32 %x, i16* %p0) {
|
||||
define void @be_i32_to_i16_order(i32 %x, i16* %p0) {
|
||||
; LE-LABEL: be_i32_to_i16_order:
|
||||
; LE: // %bb.0:
|
||||
; LE-NEXT: lsr w8, w0, #16
|
||||
; LE-NEXT: strh w8, [x1]
|
||||
; LE-NEXT: strh w0, [x1, #2]
|
||||
; LE-NEXT: ror w8, w0, #16
|
||||
; LE-NEXT: str w8, [x1]
|
||||
; LE-NEXT: ret
|
||||
;
|
||||
; BE-LABEL: be_i32_to_i16_order:
|
||||
@@ -528,13 +524,12 @@ define void @le_i64_to_i16_order(i64 %x, i16* %p0) {
|
||||
define void @be_i64_to_i16(i64 %x, i16* %p0) {
|
||||
; LE-LABEL: be_i64_to_i16:
|
||||
; LE: // %bb.0:
|
||||
; LE-NEXT: lsr x8, x0, #16
|
||||
; LE-NEXT: lsr x9, x0, #32
|
||||
; LE-NEXT: lsr x10, x0, #48
|
||||
; LE-NEXT: strh w0, [x1, #6]
|
||||
; LE-NEXT: strh w8, [x1, #4]
|
||||
; LE-NEXT: strh w9, [x1, #2]
|
||||
; LE-NEXT: strh w10, [x1]
|
||||
; LE-NEXT: lsr x8, x0, #32
|
||||
; LE-NEXT: lsr x9, x0, #48
|
||||
; LE-NEXT: ror w10, w0, #16
|
||||
; LE-NEXT: str w10, [x1, #4]
|
||||
; LE-NEXT: strh w8, [x1, #2]
|
||||
; LE-NEXT: strh w9, [x1]
|
||||
; LE-NEXT: ret
|
||||
;
|
||||
; BE-LABEL: be_i64_to_i16:
|
||||
@@ -599,8 +594,8 @@ define void @le_i64_to_i32(i64 %x, i32* %p0) {
|
||||
;
|
||||
; BE-LABEL: le_i64_to_i32:
|
||||
; BE: // %bb.0:
|
||||
; BE-NEXT: lsr x8, x0, #32
|
||||
; BE-NEXT: stp w0, w8, [x1]
|
||||
; BE-NEXT: ror x8, x0, #32
|
||||
; BE-NEXT: str x8, [x1]
|
||||
; BE-NEXT: ret
|
||||
%sh1 = lshr i64 %x, 32
|
||||
%t0 = trunc i64 %x to i32
|
||||
@@ -619,8 +614,8 @@ define void @le_i64_to_i32_order(i64 %x, i32* %p0) {
|
||||
;
|
||||
; BE-LABEL: le_i64_to_i32_order:
|
||||
; BE: // %bb.0:
|
||||
; BE-NEXT: lsr x8, x0, #32
|
||||
; BE-NEXT: stp w0, w8, [x1]
|
||||
; BE-NEXT: ror x8, x0, #32
|
||||
; BE-NEXT: str x8, [x1]
|
||||
; BE-NEXT: ret
|
||||
%sh1 = lshr i64 %x, 32
|
||||
%t0 = trunc i64 %x to i32
|
||||
@@ -634,8 +629,8 @@ define void @le_i64_to_i32_order(i64 %x, i32* %p0) {
|
||||
define void @be_i64_to_i32(i64 %x, i32* %p0) {
|
||||
; LE-LABEL: be_i64_to_i32:
|
||||
; LE: // %bb.0:
|
||||
; LE-NEXT: lsr x8, x0, #32
|
||||
; LE-NEXT: stp w8, w0, [x1]
|
||||
; LE-NEXT: ror x8, x0, #32
|
||||
; LE-NEXT: str x8, [x1]
|
||||
; LE-NEXT: ret
|
||||
;
|
||||
; BE-LABEL: be_i64_to_i32:
|
||||
@@ -654,8 +649,8 @@ define void @be_i64_to_i32(i64 %x, i32* %p0) {
|
||||
define void @be_i64_to_i32_order(i64 %x, i32* %p0) {
|
||||
; LE-LABEL: be_i64_to_i32_order:
|
||||
; LE: // %bb.0:
|
||||
; LE-NEXT: lsr x8, x0, #32
|
||||
; LE-NEXT: stp w8, w0, [x1]
|
||||
; LE-NEXT: ror x8, x0, #32
|
||||
; LE-NEXT: str x8, [x1]
|
||||
; LE-NEXT: ret
|
||||
;
|
||||
; BE-LABEL: be_i64_to_i32_order:
|
||||
|
||||
@@ -482,9 +482,8 @@ define void @trunc_i32_to_i16(i32 %x, i16* %p) {
|
||||
define void @be_i32_to_i16(i32 %x, i16* %p0) {
|
||||
; CHECK-LABEL: be_i32_to_i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movw %di, 2(%rsi)
|
||||
; CHECK-NEXT: shrl $16, %edi
|
||||
; CHECK-NEXT: movw %di, (%rsi)
|
||||
; CHECK-NEXT: rorl $16, %edi
|
||||
; CHECK-NEXT: movl %edi, (%rsi)
|
||||
; CHECK-NEXT: retq
|
||||
%sh1 = lshr i32 %x, 16
|
||||
%t0 = trunc i32 %x to i16
|
||||
@@ -498,10 +497,8 @@ define void @be_i32_to_i16(i32 %x, i16* %p0) {
|
||||
define void @be_i32_to_i16_order(i32 %x, i16* %p0) {
|
||||
; CHECK-LABEL: be_i32_to_i16_order:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: shrl $16, %eax
|
||||
; CHECK-NEXT: movw %ax, (%rsi)
|
||||
; CHECK-NEXT: movw %di, 2(%rsi)
|
||||
; CHECK-NEXT: rorl $16, %edi
|
||||
; CHECK-NEXT: movl %edi, (%rsi)
|
||||
; CHECK-NEXT: retq
|
||||
%sh1 = lshr i32 %x, 16
|
||||
%t0 = trunc i32 %x to i16
|
||||
@@ -589,9 +586,8 @@ define void @trunc_i64_to_i32(i64 %x, i32* %p) {
|
||||
define void @be_i64_to_i32(i64 %x, i32* %p0) {
|
||||
; CHECK-LABEL: be_i64_to_i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movl %edi, 4(%rsi)
|
||||
; CHECK-NEXT: shrq $32, %rdi
|
||||
; CHECK-NEXT: movl %edi, (%rsi)
|
||||
; CHECK-NEXT: rorq $32, %rdi
|
||||
; CHECK-NEXT: movq %rdi, (%rsi)
|
||||
; CHECK-NEXT: retq
|
||||
%sh1 = lshr i64 %x, 32
|
||||
%t0 = trunc i64 %x to i32
|
||||
@@ -605,10 +601,8 @@ define void @be_i64_to_i32(i64 %x, i32* %p0) {
|
||||
define void @be_i64_to_i32_order(i64 %x, i32* %p0) {
|
||||
; CHECK-LABEL: be_i64_to_i32_order:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: shrq $32, %rax
|
||||
; CHECK-NEXT: movl %eax, (%rsi)
|
||||
; CHECK-NEXT: movl %edi, 4(%rsi)
|
||||
; CHECK-NEXT: rorq $32, %rdi
|
||||
; CHECK-NEXT: movq %rdi, (%rsi)
|
||||
; CHECK-NEXT: retq
|
||||
%sh1 = lshr i64 %x, 32
|
||||
%t0 = trunc i64 %x to i32
|
||||
|
||||
Reference in New Issue
Block a user