Files
clang-p2996/llvm/test/CodeGen/AMDGPU/GlobalISel/shufflevector.ll
Alan Li 2795abb2f8 [GISel][AMDGPU] Expand ShuffleVector (#124527)
This patch dismantles G_SHUFFLE_VECTOR before lowering. The original
lowering would emit extract vector element ops. We found that by using
unmerged values the build vector op combine could find ways to fold.

Only enabled on AMDGPU.

This resolves #123631
2025-04-09 17:51:24 -07:00

19 lines
852 B
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -march=amdgcn -mtriple=amdgcn-amd-hmcsa -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX942 %s
define void @shuffle_to_extract(ptr addrspace(3) %in, ptr addrspace(3) %out) {
; GFX942-LABEL: shuffle_to_extract:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ds_read2_b64 v[2:5], v0 offset1:1
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: ds_write_b64 v1, v[4:5]
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%val = load <8 x half>, ptr addrspace(3) %in, align 8
%res = shufflevector <8 x half> %val, <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
store <4 x half> %res, ptr addrspace(3) %out, align 8
ret void
}