[GISel] Fix ShuffleVector assert (#139769)
Fixes issue: https://github.com/llvm/llvm-project/issues/139752 When G_SHUFFLE_VECTOR has only 1 element then it is possible the vector is decayed into a scalar.
This commit is contained in:
@@ -420,8 +420,11 @@ void CombinerHelper::applyCombineShuffleToBuildVector(MachineInstr &MI) const {
|
||||
else
|
||||
Extracts.push_back(Unmerge2.getReg(Val - Width));
|
||||
}
|
||||
|
||||
Builder.buildBuildVector(MI.getOperand(0).getReg(), Extracts);
|
||||
assert(Extracts.size() > 0 && "Expected at least one element in the shuffle");
|
||||
if (Extracts.size() == 1)
|
||||
Builder.buildCopy(MI.getOperand(0).getReg(), Extracts[0]);
|
||||
else
|
||||
Builder.buildBuildVector(MI.getOperand(0).getReg(), Extracts);
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
|
||||
|
||||
; Description: an end-to-end IR test for https://github.com/llvm/llvm-project/issues/139752
|
||||
; To test combine shuffle_vectors into build_vector
|
||||
|
||||
define amdgpu_gs <4 x float> @_amdgpu_gs_main() {
|
||||
; CHECK-LABEL: _amdgpu_gs_main:
|
||||
; CHECK: ; %bb.0: ; %bb
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 16
|
||||
; CHECK-NEXT: ds_read2_b32 v[0:1], v0 offset1:1
|
||||
; CHECK-NEXT: s_mov_b32 s0, 0
|
||||
; CHECK-NEXT: s_mov_b32 s1, s0
|
||||
; CHECK-NEXT: s_mov_b32 s2, s0
|
||||
; CHECK-NEXT: s_mov_b32 s3, s0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 0
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v2, s[0:3], 0 idxen
|
||||
; CHECK-NEXT: s_nop 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, v1
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: ; return to shader part epilog
|
||||
bb:
|
||||
%i = load <1 x float>, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) null, i32 16), align 4
|
||||
%i1 = load <1 x float>, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr addrspace(3) null, i32 20), align 4
|
||||
%i2 = shufflevector <1 x float> %i, <1 x float> zeroinitializer, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
|
||||
call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %i2, <4 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0)
|
||||
%i3 = shufflevector <1 x float> %i1, <1 x float> zeroinitializer, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
|
||||
ret <4 x float> %i3
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write)
|
||||
declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) #0
|
||||
|
||||
|
||||
@@ -20,10 +20,10 @@ body: |
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
%0:_(p3) = COPY $vgpr0
|
||||
%1:_(p3) = COPY $vgpr1
|
||||
%12:_(<8 x s16>) = G_IMPLICIT_DEF
|
||||
%10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
|
||||
%11:_(<4 x s16>) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(4, 5, 6, 7)
|
||||
G_STORE %11(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
|
||||
%2:_(<8 x s16>) = G_IMPLICIT_DEF
|
||||
%3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
|
||||
%4:_(<4 x s16>) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(4, 5, 6, 7)
|
||||
G_STORE %4(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
|
||||
SI_RETURN
|
||||
...
|
||||
|
||||
@@ -46,10 +46,10 @@ body: |
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
%0:_(p3) = COPY $vgpr0
|
||||
%1:_(p3) = COPY $vgpr1
|
||||
%12:_(<8 x s16>) = G_IMPLICIT_DEF
|
||||
%10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
|
||||
%11:_(<2 x s16>) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(3, 4)
|
||||
G_STORE %11(<2 x s16>), %1(p3) :: (store (<2 x s16>), addrspace 3)
|
||||
%2:_(<8 x s16>) = G_IMPLICIT_DEF
|
||||
%3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
|
||||
%4:_(<2 x s16>) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(3, 4)
|
||||
G_STORE %4(<2 x s16>), %1(p3) :: (store (<2 x s16>), addrspace 3)
|
||||
SI_RETURN
|
||||
|
||||
...
|
||||
@@ -73,10 +73,10 @@ body: |
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
%0:_(p3) = COPY $vgpr0
|
||||
%1:_(p3) = COPY $vgpr1
|
||||
%12:_(<8 x s16>) = G_IMPLICIT_DEF
|
||||
%10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
|
||||
%11:_(<3 x s16>) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(0, 1, 2)
|
||||
G_STORE %11(<3 x s16>), %1(p3) :: (store (<3 x s16>), addrspace 3)
|
||||
%2:_(<8 x s16>) = G_IMPLICIT_DEF
|
||||
%3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
|
||||
%4:_(<3 x s16>) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(0, 1, 2)
|
||||
G_STORE %4(<3 x s16>), %1(p3) :: (store (<3 x s16>), addrspace 3)
|
||||
SI_RETURN
|
||||
...
|
||||
|
||||
@@ -101,10 +101,10 @@ body: |
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
%0:_(p3) = COPY $vgpr0
|
||||
%1:_(p3) = COPY $vgpr1
|
||||
%12:_(<8 x s16>) = G_IMPLICIT_DEF
|
||||
%10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
|
||||
%11:_(<4 x s16>) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(4, 5, -1, 7)
|
||||
G_STORE %11(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
|
||||
%2:_(<8 x s16>) = G_IMPLICIT_DEF
|
||||
%3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
|
||||
%4:_(<4 x s16>) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(4, 5, -1, 7)
|
||||
G_STORE %4(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
|
||||
SI_RETURN
|
||||
...
|
||||
|
||||
@@ -128,10 +128,34 @@ body: |
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
%0:_(p3) = COPY $vgpr0
|
||||
%1:_(p3) = COPY $vgpr1
|
||||
%12:_(<8 x s16>) = G_IMPLICIT_DEF
|
||||
%10:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
|
||||
%11:_(<4 x s16>) = G_SHUFFLE_VECTOR %10(<8 x s16>), %12, shufflemask(6, 7, 8, 9)
|
||||
G_STORE %11(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
|
||||
%2:_(<8 x s16>) = G_IMPLICIT_DEF
|
||||
%3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
|
||||
%4:_(<4 x s16>) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(6, 7, 8, 9)
|
||||
G_STORE %4(<4 x s16>), %1(p3) :: (store (<4 x s16>), addrspace 3)
|
||||
SI_RETURN
|
||||
...
|
||||
|
||||
|
||||
---
|
||||
name: shuffle_vector_to_copy
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
; CHECK-LABEL: name: shuffle_vector_to_copy
|
||||
; CHECK: liveins: $vgpr0, $vgpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1
|
||||
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load (<8 x s16>), align 8, addrspace 3)
|
||||
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<8 x s16>)
|
||||
; CHECK-NEXT: G_STORE [[UV4]](s16), [[COPY1]](p3) :: (store (s16), addrspace 3)
|
||||
; CHECK-NEXT: SI_RETURN
|
||||
%0:_(p3) = COPY $vgpr0
|
||||
%1:_(p3) = COPY $vgpr1
|
||||
%2:_(<8 x s16>) = G_IMPLICIT_DEF
|
||||
%3:_(<8 x s16>) = G_LOAD %0(p3) :: (load (<8 x s16>), align 8, addrspace 3)
|
||||
%4:_(s16) = G_SHUFFLE_VECTOR %3(<8 x s16>), %2, shufflemask(4)
|
||||
G_STORE %4(s16), %1(p3) :: (store (s16), addrspace 3)
|
||||
SI_RETURN
|
||||
...
|
||||
|
||||
Reference in New Issue
Block a user