Files
clang-p2996/llvm/test/CodeGen/PowerPC/fast-isel-load-store.ll
Paul Robinson cf1c774d6a [FastISel] Flush local value map on ever instruction
Local values are constants or addresses that can't be folded into
the instruction that uses them. FastISel materializes these in a
"local value" area that always dominates the current insertion
point, to try to avoid materializing these values more than once
(per block).

https://reviews.llvm.org/D43093 added code to sink these local
value instructions to their first use, which has two beneficial
effects. One, it is likely to avoid some unnecessary spills and
reloads; two, it allows us to attach the debug location of the
user to the local value instruction. The latter effect can
improve the debugging experience for debuggers with a "set next
statement" feature, such as the Visual Studio debugger and PS4
debugger, because instructions to set up constants for a given
statement will be associated with the appropriate source line.

There are also some constants (primarily addresses) that could be
produced by no-op casts or GEP instructions; the main difference
from "local value" instructions is that these are values from
separate IR instructions, and therefore could have multiple users
across multiple basic blocks. D43093 avoided sinking these, even
though they were emitted to the same "local value" area as the
other instructions. The patch comment for D43093 states:

  Local values may also be used by no-op casts, which adds the
  register to the RegFixups table. Without reversing the RegFixups
  map direction, we don't have enough information to sink these
  instructions.

This patch undoes most of D43093, and instead flushes the local
value map after(*) every IR instruction, using that instruction's
debug location. This avoids sometimes incorrect locations used
previously, and emits instructions in a more natural order.

This does mean materialized values are not re-used across IR
instruction boundaries; however, only about 5% of those values
were reused in an experimental self-build of clang.

(*) Actually, just prior to the next instruction. It seems like
it would be cleaner the other way, but I was having trouble
getting that to work.

Differential Revision: https://reviews.llvm.org/D91734
2020-11-25 13:05:00 -05:00

221 lines
4.8 KiB
LLVM

; RUN: llc -relocation-model=static < %s -O0 -verify-machineinstrs -fast-isel -fast-isel-abort=1 -mattr=-vsx -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
; RUN: llc -relocation-model=static < %s -O0 -verify-machineinstrs -fast-isel -fast-isel-abort=1 -mattr=+vsx -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=VSX
; RUN: llc -relocation-model=static < %s -O0 -verify-machineinstrs -fast-isel -fast-isel-abort=1 -mattr=spe -mtriple=powerpc-unknown-linux-gnu -mcpu=e500 | FileCheck %s --check-prefix=SPE
; This test verifies that load/store instructions are properly generated,
; and that they pass MI verification.
@a = global i8 1, align 1
@b = global i16 2, align 2
@c = global i32 4, align 4
@d = global i64 8, align 8
@e = global float 1.25, align 4
@f = global double 3.5, align 8
%struct.s = type<{ i8, i32 }>
%struct.t = type<{ i8, i64 }>
@g = global %struct.s <{ i8 1, i32 2 }>, align 1
@h = global %struct.t <{ i8 1, i64 2 }>, align 1
@i = common global [8192 x i64] zeroinitializer, align 8
; load
define i8 @t1() nounwind {
; ELF64-LABEL: t1:
%1 = load i8, i8* @a, align 1
; ELF64: lbz
%2 = add nsw i8 %1, 1
; ELF64: addi
ret i8 %2
}
define i16 @t2() nounwind {
; ELF64-LABEL: t2:
%1 = load i16, i16* @b, align 2
; ELF64: lhz
%2 = add nsw i16 %1, 1
; ELF64: addi
ret i16 %2
}
define i32 @t3() nounwind {
; ELF64-LABEL: t3:
%1 = load i32, i32* @c, align 4
; ELF64: lwz
%2 = add nsw i32 %1, 1
; ELF64: addi
ret i32 %2
}
define i64 @t4() nounwind {
; ELF64-LABEL: t4:
%1 = load i64, i64* @d, align 4
; ELF64: ld
%2 = add nsw i64 %1, 1
; ELF64: addi
ret i64 %2
}
define float @t5() nounwind {
; ELF64-LABEL: t5:
; SPE-LABEL: t5:
%1 = load float, float* @e, align 4
; ELF64: lfs
; SPE: lwz
%2 = fadd float %1, 1.0
; ELF64: fadds
; SPE: efsadd
ret float %2
}
define double @t6() nounwind {
; ELF64-LABEL: t6:
; SPE-LABEL: t6:
%1 = load double, double* @f, align 8
; ELF64: lfd
; VSX: lxsdx
; SPE: evldd
%2 = fadd double %1, 1.0
; ELF64: fadd
; VSX: xsadddp
; SPE: efdadd
ret double %2
}
; store
define void @t7(i8 %v) nounwind {
; ELF64-LABEL: t7:
%1 = add nsw i8 %v, 1
store i8 %1, i8* @a, align 1
; ELF64: addi
; ELF64: addis
; ELF64: addi
; ELF64: stb
ret void
}
define void @t8(i16 %v) nounwind {
; ELF64-LABEL: t8:
%1 = add nsw i16 %v, 1
store i16 %1, i16* @b, align 2
; ELF64: addi
; ELF64: addis
; ELF64: addi
; ELF64: sth
ret void
}
define void @t9(i32 %v) nounwind {
; ELF64-LABEL: t9:
%1 = add nsw i32 %v, 1
store i32 %1, i32* @c, align 4
; ELF64: addi
; ELF64: addis
; ELF64: addi
; ELF64: stw
ret void
}
define void @t10(i64 %v) nounwind {
; ELF64-LABEL: t10:
%1 = add nsw i64 %v, 1
store i64 %1, i64* @d, align 4
; ELF64: addi
; ELF64: addis
; ELF64: addi
; ELF64: std
ret void
}
define void @t11(float %v) nounwind {
; ELF64-LABEL: t11:
; SPE: t11:
%1 = fadd float %v, 1.0
store float %1, float* @e, align 4
; ELF64: fadds
; ELF64: stfs
; SPE: efsadd
; SPE: stw
ret void
}
define void @t12(double %v) nounwind {
; ELF64-LABEL: t12:
; SPE-LABEL: t12:
%1 = fadd double %v, 1.0
store double %1, double* @f, align 8
; ELF64: fadd
; ELF64: stfd
; VSX: xsadddp
; VSX: stxsdx
; SPE: efdadd
; SPE: evstdd
ret void
}
;; lwa requires an offset divisible by 4, so we need lwax here.
define i64 @t13() nounwind {
; ELF64-LABEL: t13:
%1 = load i32, i32* getelementptr inbounds (%struct.s, %struct.s* @g, i32 0, i32 1), align 1
%2 = sext i32 %1 to i64
; ELF64: li
; ELF64: lwax
%3 = add nsw i64 %2, 1
; ELF64: addi
ret i64 %3
}
;; ld requires an offset divisible by 4, so we need ldx here.
define i64 @t14() nounwind {
; ELF64-LABEL: t14:
%1 = load i64, i64* getelementptr inbounds (%struct.t, %struct.t* @h, i32 0, i32 1), align 1
; ELF64: li
; ELF64: ldx
%2 = add nsw i64 %1, 1
; ELF64: addi
ret i64 %2
}
;; std requires an offset divisible by 4, so we need stdx here.
define void @t15(i64 %v) nounwind {
; ELF64-LABEL: t15:
%1 = add nsw i64 %v, 1
store i64 %1, i64* getelementptr inbounds (%struct.t, %struct.t* @h, i32 0, i32 1), align 1
; ELF64: addi
; ELF64: addis
; ELF64: addi
; ELF64: li
; ELF64: stdx
ret void
}
;; ld requires an offset that fits in 16 bits, so we need ldx here.
define i64 @t16() nounwind {
; ELF64-LABEL: t16:
%1 = load i64, i64* getelementptr inbounds ([8192 x i64], [8192 x i64]* @i, i32 0, i64 5000), align 8
; ELF64: lis
; ELF64: ori
; ELF64: ldx
%2 = add nsw i64 %1, 1
; ELF64: addi
ret i64 %2
}
;; std requires an offset that fits in 16 bits, so we need stdx here.
define void @t17(i64 %v) nounwind {
; ELF64-LABEL: t17:
%1 = add nsw i64 %v, 1
store i64 %1, i64* getelementptr inbounds ([8192 x i64], [8192 x i64]* @i, i32 0, i64 5000), align 8
; ELF64: addi
; ELF64: addis
; ELF64: addi
; ELF64: lis
; ELF64: ori
; ELF64: stdx
ret void
}