A bfloat select operation will currently crash, but is allowed from C. This adds handling for the operation, turning it into a FCSELHrrr if fullfp16 is present, or converting it to a FCSELSrrr if not. The FCSELSrrr is created via using INSERT_SUBREG/EXTRACT_SUBREG to convert the bf16 to a f32 and using the f32 pattern for FCSELSrrr. (I originally attempted to do this via a tablegen pattern, but it appears that the nzcv glue is places onto the wrong node, causing it to be forgotten and incorrect scheduling to be emitted). The FCSELSrrr can also be used for fp16 selects when +fullfp16 is not present, which helps avoid an unnecessary promotion to f32. Differential Revision: https://reviews.llvm.org/D131253
65 lines
2.5 KiB
LLVM
65 lines
2.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple aarch64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-BASE
|
|
; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fullfp16 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-FP16
|
|
; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+bf16 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-BASE
|
|
; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+bf16,+fullfp16 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-FP16
|
|
|
|
define bfloat @test_select(bfloat %a, bfloat %b, i1 zeroext %c) {
|
|
; CHECK-BASE-LABEL: test_select:
|
|
; CHECK-BASE: // %bb.0:
|
|
; CHECK-BASE-NEXT: // kill: def $h0 killed $h0 def $s0
|
|
; CHECK-BASE-NEXT: cmp w0, #0
|
|
; CHECK-BASE-NEXT: // kill: def $h1 killed $h1 def $s1
|
|
; CHECK-BASE-NEXT: fcsel s0, s0, s1, ne
|
|
; CHECK-BASE-NEXT: // kill: def $h0 killed $h0 killed $s0
|
|
; CHECK-BASE-NEXT: ret
|
|
;
|
|
; CHECK-FP16-LABEL: test_select:
|
|
; CHECK-FP16: // %bb.0:
|
|
; CHECK-FP16-NEXT: cmp w0, #0
|
|
; CHECK-FP16-NEXT: fcsel h0, h0, h1, ne
|
|
; CHECK-FP16-NEXT: ret
|
|
%r = select i1 %c, bfloat %a, bfloat %b
|
|
ret bfloat %r
|
|
}
|
|
|
|
define bfloat @test_select_fcc(bfloat %a, bfloat %b, float %c, float %d) {
|
|
; CHECK-BASE-LABEL: test_select_fcc:
|
|
; CHECK-BASE: // %bb.0:
|
|
; CHECK-BASE-NEXT: fcmp s2, s3
|
|
; CHECK-BASE-NEXT: // kill: def $h0 killed $h0 def $s0
|
|
; CHECK-BASE-NEXT: // kill: def $h1 killed $h1 def $s1
|
|
; CHECK-BASE-NEXT: fcsel s0, s0, s1, ne
|
|
; CHECK-BASE-NEXT: // kill: def $h0 killed $h0 killed $s0
|
|
; CHECK-BASE-NEXT: ret
|
|
;
|
|
; CHECK-FP16-LABEL: test_select_fcc:
|
|
; CHECK-FP16: // %bb.0:
|
|
; CHECK-FP16-NEXT: fcmp s2, s3
|
|
; CHECK-FP16-NEXT: fcsel h0, h0, h1, ne
|
|
; CHECK-FP16-NEXT: ret
|
|
%cc = fcmp une float %c, %d
|
|
%r = select i1 %cc, bfloat %a, bfloat %b
|
|
ret bfloat %r
|
|
}
|
|
|
|
define bfloat @test_select_icc(bfloat %a, bfloat %b, i32 %c, i32 %d) {
|
|
; CHECK-BASE-LABEL: test_select_icc:
|
|
; CHECK-BASE: // %bb.0:
|
|
; CHECK-BASE-NEXT: // kill: def $h0 killed $h0 def $s0
|
|
; CHECK-BASE-NEXT: cmp w0, w1
|
|
; CHECK-BASE-NEXT: // kill: def $h1 killed $h1 def $s1
|
|
; CHECK-BASE-NEXT: fcsel s0, s0, s1, ne
|
|
; CHECK-BASE-NEXT: // kill: def $h0 killed $h0 killed $s0
|
|
; CHECK-BASE-NEXT: ret
|
|
;
|
|
; CHECK-FP16-LABEL: test_select_icc:
|
|
; CHECK-FP16: // %bb.0:
|
|
; CHECK-FP16-NEXT: cmp w0, w1
|
|
; CHECK-FP16-NEXT: fcsel h0, h0, h1, ne
|
|
; CHECK-FP16-NEXT: ret
|
|
%cc = icmp ne i32 %c, %d
|
|
%r = select i1 %cc, bfloat %a, bfloat %b
|
|
ret bfloat %r
|
|
}
|