Files
clang-p2996/llvm/test/CodeGen/AMDGPU/wmma_modifiers.ll
Changpeng Fang 3564666fe1 [AMDGPU]: Fix type signatures for wmma intrinsics, NFC (#80087)
Make the wmma intrinsic type signatures to be canonical. We need
a type signature as long as the type is not fixed. However, when an
argument's type matches a previous argument's type, we do not need the
signature for this argument.

 This patch fixes three general cases:
  1. add missing signatures
  2. remove signatures for matching arguments
3. reorer the signatures -- return type signature should always appear
first
2024-01-30 23:17:35 -08:00

23 lines
1.0 KiB
LLVM

; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s
declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v16f16(<16 x half>, <16 x half>, <4 x float>)
; Make sure we don't crash when trying to select modifiers in SelectVOP3PMods.
define amdgpu_cs void @xyz () {
; CHECK-LABEL: xyz:
; CHECK: v_wmma_f32_16x16x16_f16 v[0:3], v[0:7], v[0:7], v[0:3]
.entry:
br label %loop
loop:
%ld = load <8 x float>, ptr addrspace(5) null, align 32
%in_shuffle = shufflevector <8 x float> %ld, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%wmma = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v16f16(<16 x half> undef, <16 x half> undef, <4 x float> %in_shuffle)
%out_shuffle = shufflevector <4 x float> %wmma, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
store <8 x float> %out_shuffle, ptr addrspace(5) null, align 32
br i1 false, label %.exit, label %loop
.exit:
ret void
}