Files
clang-p2996/llvm/test/CodeGen/X86/AMX/amx-configO2toO0.ll
Nikita Popov 0eb17a9d86 [X86][AMX] Update tests to use opaque pointers (NFC)
There are some codegen differences here, because presence of
bitcasts affects AMX codegen in minor ways (the bitcasts are not
always in the input IR, but may be added by X86PreAMXConfig
for example).

Differential Revision: https://reviews.llvm.org/D128424
2022-06-23 14:37:45 +02:00

178 lines
7.8 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f | FileCheck %s --check-prefix=AVX512
@buf = dso_local global [1024 x i8] zeroinitializer, align 16
@buf2 = dso_local global [1024 x i8] zeroinitializer, align 16
define dso_local void @test_api(i32 %cond, i16 signext %row, i16 signext %col) nounwind {
; AVX512-LABEL: test_api:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: pushq %rbp
; AVX512-NEXT: movq %rsp, %rbp
; AVX512-NEXT: andq $-1024, %rsp # imm = 0xFC00
; AVX512-NEXT: subq $8192, %rsp # imm = 0x2000
; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovups %zmm0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: movb $1, {{[0-9]+}}(%rsp)
; AVX512-NEXT: movw %dx, %ax
; AVX512-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; AVX512-NEXT: movw %si, %ax
; AVX512-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill
; AVX512-NEXT: cmpl $0, %edi
; AVX512-NEXT: je .LBB0_2
; AVX512-NEXT: # %bb.1: # %if.then
; AVX512-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
; AVX512-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %cx # 2-byte Reload
; AVX512-NEXT: movl $buf, %esi
; AVX512-NEXT: movl $32, %edi
; AVX512-NEXT: movw $8, %dx
; AVX512-NEXT: # implicit-def: $al
; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp)
; AVX512-NEXT: movw %dx, {{[0-9]+}}(%rsp)
; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp)
; AVX512-NEXT: tileloadd (%rsi,%rdi), %tmm0
; AVX512-NEXT: movl $64, %edi
; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
; AVX512-NEXT: movw $8, %dx
; AVX512-NEXT: tilestored %tmm0, (%rsi,%rdi)
; AVX512-NEXT: movl $buf, %esi
; AVX512-NEXT: movl $32, %edi
; AVX512-NEXT: movw $8, %dx
; AVX512-NEXT: # implicit-def: $al
; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp)
; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp)
; AVX512-NEXT: # implicit-def: $dl
; AVX512-NEXT: movb %dl, {{[0-9]+}}(%rsp)
; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp)
; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp)
; AVX512-NEXT: tileloadd (%rsi,%rdi), %tmm0
; AVX512-NEXT: movl $64, %edi
; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
; AVX512-NEXT: movw $8, %dx
; AVX512-NEXT: tilestored %tmm0, (%rsi,%rdi)
; AVX512-NEXT: movl $buf, %edx
; AVX512-NEXT: movl $32, %esi
; AVX512-NEXT: tileloadd (%rdx,%rsi), %tmm0
; AVX512-NEXT: movl $64, %esi
; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
; AVX512-NEXT: tilestored %tmm0, (%rdx,%rsi)
; AVX512-NEXT: jmp .LBB0_3
; AVX512-NEXT: .LBB0_2: # %if.else
; AVX512-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
; AVX512-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %cx # 2-byte Reload
; AVX512-NEXT: movl $buf2, %esi
; AVX512-NEXT: movl $32, %edi
; AVX512-NEXT: movw $8, %dx
; AVX512-NEXT: # implicit-def: $al
; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp)
; AVX512-NEXT: movw %dx, {{[0-9]+}}(%rsp)
; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp)
; AVX512-NEXT: tileloadd (%rsi,%rdi), %tmm0
; AVX512-NEXT: movl $64, %edi
; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
; AVX512-NEXT: movw $8, %dx
; AVX512-NEXT: tilestored %tmm0, (%rsi,%rdi)
; AVX512-NEXT: movl $buf2, %esi
; AVX512-NEXT: movl $32, %edi
; AVX512-NEXT: movw $8, %dx
; AVX512-NEXT: # implicit-def: $al
; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp)
; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp)
; AVX512-NEXT: # implicit-def: $dl
; AVX512-NEXT: movb %dl, {{[0-9]+}}(%rsp)
; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp)
; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp)
; AVX512-NEXT: tileloadd (%rsi,%rdi), %tmm0
; AVX512-NEXT: movl $64, %edi
; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rsi
; AVX512-NEXT: movw $8, %dx
; AVX512-NEXT: tilestored %tmm0, (%rsi,%rdi)
; AVX512-NEXT: movl $buf2, %edx
; AVX512-NEXT: movl $32, %esi
; AVX512-NEXT: tileloadd (%rdx,%rsi), %tmm0
; AVX512-NEXT: movl $64, %esi
; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
; AVX512-NEXT: tilestored %tmm0, (%rdx,%rsi)
; AVX512-NEXT: .LBB0_3: # %if.end
; AVX512-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %ax # 2-byte Reload
; AVX512-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %cx # 2-byte Reload
; AVX512-NEXT: movl $64, %edi
; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
; AVX512-NEXT: movw $8, %si
; AVX512-NEXT: # implicit-def: $al
; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp)
; AVX512-NEXT: movw %si, {{[0-9]+}}(%rsp)
; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp)
; AVX512-NEXT: tileloadd (%rdx,%rdi), %tmm0
; AVX512-NEXT: movabsq $64, %rdx
; AVX512-NEXT: tilestored %tmm0, 1024(%rsp,%rdx) # 1024-byte Folded Spill
; AVX512-NEXT: movl $64, %r8d
; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; AVX512-NEXT: movw $8, %dx
; AVX512-NEXT: # implicit-def: $al
; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp)
; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp)
; AVX512-NEXT: # implicit-def: $al
; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp)
; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp)
; AVX512-NEXT: # implicit-def: $al
; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp)
; AVX512-NEXT: movw %si, {{[0-9]+}}(%rsp)
; AVX512-NEXT: # implicit-def: $al
; AVX512-NEXT: movb %al, {{[0-9]+}}(%rsp)
; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp)
; AVX512-NEXT: # implicit-def: $dl
; AVX512-NEXT: movb %dl, {{[0-9]+}}(%rsp)
; AVX512-NEXT: movw %cx, {{[0-9]+}}(%rsp)
; AVX512-NEXT: ldtilecfg {{[0-9]+}}(%rsp)
; AVX512-NEXT: tileloadd (%rdi,%r8), %tmm2
; AVX512-NEXT: movl $64, %edi
; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
; AVX512-NEXT: tileloadd (%rdx,%rdi), %tmm0
; AVX512-NEXT: movw $8, %dx
; AVX512-NEXT: movabsq $64, %rdi
; AVX512-NEXT: tileloadd 1024(%rsp,%rdi), %tmm1 # 1024-byte Folded Reload
; AVX512-NEXT: tdpbssd %tmm2, %tmm1, %tmm0
; AVX512-NEXT: movl $64, %esi
; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
; AVX512-NEXT: tilestored %tmm0, (%rdx,%rsi)
; AVX512-NEXT: movl $64, %esi
; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %rdx
; AVX512-NEXT: tileloadd (%rdx,%rsi), %tmm0
; AVX512-NEXT: movl $buf, %edx
; AVX512-NEXT: movl $32, %esi
; AVX512-NEXT: tilestored %tmm0, (%rdx,%rsi)
; AVX512-NEXT: movq %rbp, %rsp
; AVX512-NEXT: popq %rbp
; AVX512-NEXT: tilerelease
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
entry:
%tobool.not = icmp eq i32 %cond, 0
br i1 %tobool.not, label %if.else, label %if.then
if.then: ; preds = %entry
%0 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, ptr @buf, i64 32)
%1 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, ptr @buf, i64 32)
%2 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, ptr @buf, i64 32)
br label %if.end
if.else: ; preds = %entry
%3 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 8, ptr @buf2, i64 32)
%4 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 8, i16 %col, ptr @buf2, i64 32)
%5 = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 %row, i16 %col, ptr @buf2, i64 32)
br label %if.end
if.end: ; preds = %if.else, %if.then
%a.sroa.1094.0.in = phi x86_amx [ %3, %if.else ], [ %0, %if.then ]
%b.sroa.1069.0.in = phi x86_amx [ %4, %if.else ], [ %1, %if.then ]
%c.sroa.1044.0.in = phi x86_amx [ %5, %if.else ], [ %2, %if.then ]
%6 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 %row, i16 %col, i16 8, x86_amx %c.sroa.1044.0.in, x86_amx %a.sroa.1094.0.in, x86_amx %b.sroa.1069.0.in)
tail call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, ptr @buf, i64 32, x86_amx %6)
ret void
}
declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, ptr, i64)
declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
declare void @llvm.x86.tilestored64.internal(i16, i16, ptr, i64, x86_amx)