Prior to this change, `SelectionDAGBuilder` was producing `SDNode`s of the form: `f32 = extract_vector_elt <1 x bfloat|half>, i32 0` when lowering phis of `<1 x bfloat|half>` and running on a target that promotes this type to `f32` (like some x86 or AMDGPU targets.) This construct is invalid since this type of node only allows type extensions for integer types. It went unotice because the `extract_vector_elt` node is later broken down in `bitcast` followed by `bf16_to_fp|fp_extend`. However, when the argument of the phi is a constant we were crashing because the existing code would try to constant fold this `extract_vector_elt` into a any_ext. This patch fixes this by using a proper decomposition for `<1 x bfloat|half>`: ``` bfloat|half = bitcast <1 x blfoat|half> float = fp_extend bfloat|half ``` This change should be NFC for the non-constant-folding cases and fix the SDISel crashes (reported in https://github.com/llvm/llvm-project/issues/94449) for the folding cases. Note: The change on the arm test is a missing fp16 to f32 constant folding exposed by this patch. I'll push a separate improvement for that.
115 lines
3.8 KiB
LLVM
115 lines
3.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: llc -global-isel=0 -mcpu=generic -mtriple=x86_64-apple-darwin %s -o - | FileCheck %s
|
|
|
|
; For all these tests we disable optimizations through function attributes
|
|
; because the code we are exercising here needs phis and we want to keep the
|
|
; IR small.
|
|
|
|
; This code used to crash in SDISel because bf16 was promoted to f32 through
|
|
; a `f32 = vector_extract_elt <1 x bf16>, i32 0`, which is illegal.
|
|
; The invalid SDNode and thus, the crash was only exposed by the constant
|
|
; folding.
|
|
define void @phi_vec1bf16_to_f32_with_const_folding(ptr %dst) #0 {
|
|
; CHECK-LABEL: phi_vec1bf16_to_f32_with_const_folding:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: pushq %rbx
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset %rbx, -16
|
|
; CHECK-NEXT: movq %rdi, %rbx
|
|
; CHECK-NEXT: jmp LBB0_1
|
|
; CHECK-NEXT: LBB0_1: ## %bb
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
; CHECK-NEXT: callq ___truncsfbf2
|
|
; CHECK-NEXT: pextrw $0, %xmm0, %eax
|
|
; CHECK-NEXT: movw %ax, 2(%rbx)
|
|
; CHECK-NEXT: popq %rbx
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
br label %bb
|
|
|
|
bb:
|
|
%phi = phi <1 x bfloat> [ zeroinitializer, %entry ]
|
|
%res = shufflevector <1 x bfloat> poison, <1 x bfloat> %phi, <2 x i32> <i32 0, i32 1>
|
|
store <2 x bfloat> %res, ptr %dst
|
|
ret void
|
|
}
|
|
; Same as phi_vec1bf16_to_f32_with_const_folding but without the constant
|
|
; folding.
|
|
; This test exercises the same invalid SDNode, but it happened to work by
|
|
; accident before. Here we make sure the fix also work as expected in the
|
|
; non-constant folding case.
|
|
define void @phi_vec1bf16_to_f32(ptr %src, ptr %dst) #0 {
|
|
; CHECK-LABEL: phi_vec1bf16_to_f32:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: pushq %rbx
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset %rbx, -16
|
|
; CHECK-NEXT: movq %rsi, %rbx
|
|
; CHECK-NEXT: movzwl (%rdi), %eax
|
|
; CHECK-NEXT: shll $16, %eax
|
|
; CHECK-NEXT: movd %eax, %xmm0
|
|
; CHECK-NEXT: jmp LBB1_1
|
|
; CHECK-NEXT: LBB1_1: ## %bb
|
|
; CHECK-NEXT: callq ___truncsfbf2
|
|
; CHECK-NEXT: pextrw $0, %xmm0, %eax
|
|
; CHECK-NEXT: movw %ax, 2(%rbx)
|
|
; CHECK-NEXT: popq %rbx
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%input = load <1 x bfloat>, ptr %src
|
|
br label %bb
|
|
|
|
bb:
|
|
%phi = phi <1 x bfloat> [ %input, %entry ]
|
|
%res = shufflevector <1 x bfloat> poison, <1 x bfloat> %phi, <2 x i32> <i32 0, i32 1>
|
|
store <2 x bfloat> %res, ptr %dst
|
|
ret void
|
|
}
|
|
|
|
|
|
; Half type is legal on x86 so nothing special here, but it
|
|
; doesn't hurt to be thorough.
|
|
define void @phi_vec1half_with_const_folding(ptr %dst) #0 {
|
|
; CHECK-LABEL: phi_vec1half_with_const_folding:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: xorps %xmm0, %xmm0
|
|
; CHECK-NEXT: jmp LBB2_1
|
|
; CHECK-NEXT: LBB2_1: ## %bb
|
|
; CHECK-NEXT: pextrw $0, %xmm0, %eax
|
|
; CHECK-NEXT: movw %ax, 2(%rdi)
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
br label %bb
|
|
|
|
bb:
|
|
%phi = phi <1 x half> [ zeroinitializer, %entry ]
|
|
%res = shufflevector <1 x half> poison, <1 x half> %phi, <2 x i32> <i32 0, i32 1>
|
|
store <2 x half> %res, ptr %dst
|
|
ret void
|
|
}
|
|
|
|
; Half type is legal on x86 so nothing special here, but it
|
|
; doesn't hurt to be thorough.
|
|
; Same as phi_vec1half_with_constant_folding but without the constant folding.
|
|
define void @phi_vec1half(ptr %src, ptr %dst) #0 {
|
|
; CHECK-LABEL: phi_vec1half:
|
|
; CHECK: ## %bb.0: ## %entry
|
|
; CHECK-NEXT: pinsrw $0, (%rdi), %xmm0
|
|
; CHECK-NEXT: jmp LBB3_1
|
|
; CHECK-NEXT: LBB3_1: ## %bb
|
|
; CHECK-NEXT: pextrw $0, %xmm0, %eax
|
|
; CHECK-NEXT: movw %ax, 2(%rsi)
|
|
; CHECK-NEXT: retq
|
|
entry:
|
|
%input = load <1 x half>, ptr %src
|
|
br label %bb
|
|
|
|
bb:
|
|
%phi = phi <1 x half> [ %input, %entry ]
|
|
%res = shufflevector <1 x half> poison, <1 x half> %phi, <2 x i32> <i32 0, i32 1>
|
|
store <2 x half> %res, ptr %dst
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { noinline optnone }
|