Adding a tuning feature to fix https://github.com/llvm/llvm-project/issues/84182 Generates vpdpwssd (instead of vpmaddwd + vpaddd sequence)
13 lines
577 B
LLVM
13 lines
577 B
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+fast-dpwssd | FileCheck %s
|
|
|
|
define <16 x i32> @vpdpwssd_test(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) {
|
|
; CHECK-LABEL: vpdpwssd_test:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0
|
|
; CHECK-NEXT: retq
|
|
%4 = tail call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
|
|
ret <16 x i32> %4
|
|
}
|