Files
clang-p2996/llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll
Carl Ritson 4a21e3afa2 [LiveIntervals] repairIntervalsInRange: recompute width changes (#78564)
Extend repairIntervalsInRange to completely recompute the interva for a
register if subregister defs exist without precise subrange matches
(LaneMask exactly matching subregister).
This occurs when register sequences are lowered to copies such that the
size of the copies do not match any uses of the subregisters formed
(i.e. during twoaddressinstruction).

The subranges without this change are probably legal, but do not match
those generated by live interval computation. This creates problems with
other code that assumes subranges precisely cover all subregisters
defined, e.g. shrinkToUses().
2024-03-11 15:24:17 +09:00

258 lines
11 KiB
LLVM

; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,ALIGNED,SPLIT %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,ALIGNED,SPLIT %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,ALIGNED,SPLIT %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -mattr=+cumode < %s | FileCheck -check-prefixes=GCN,ALIGNED,VECT %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -mattr=+cumode,+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,UNALIGNED,VECT %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,ALIGNED,VECT %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -mattr=+cumode < %s | FileCheck -check-prefixes=GCN,ALIGNED,VECT %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -mattr=+cumode -early-live-intervals < %s | FileCheck -check-prefixes=GCN,ALIGNED,VECT %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -mattr=+cumode,+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,UNALIGNED,VECT %s
; GCN-LABEL: test_local_misaligned_v2:
; GCN-DAG: ds_{{read2|load_2addr}}_b32
; GCN-DAG: ds_{{write2|store_2addr}}_b32
define amdgpu_kernel void @test_local_misaligned_v2(ptr addrspace(3) %arg) {
bb:
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, ptr addrspace(3) %arg, i32 %lid
%load = load <2 x i32>, ptr addrspace(3) %gep, align 4
%v1 = extractelement <2 x i32> %load, i32 0
%v2 = extractelement <2 x i32> %load, i32 1
%v3 = insertelement <2 x i32> undef, i32 %v2, i32 0
%v4 = insertelement <2 x i32> %v3, i32 %v1, i32 1
store <2 x i32> %v4, ptr addrspace(3) %gep, align 4
ret void
}
; GCN-LABEL: test_local_misaligned_v4:
; GCN-DAG: ds_{{read2|load_2addr}}_b32
; GCN-DAG: ds_{{read2|load_2addr}}_b32
; GCN-DAG: ds_{{write2|store_2addr}}_b32
; GCN-DAG: ds_{{write2|store_2addr}}_b32
define amdgpu_kernel void @test_local_misaligned_v4(ptr addrspace(3) %arg) {
bb:
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, ptr addrspace(3) %arg, i32 %lid
%load = load <4 x i32>, ptr addrspace(3) %gep, align 4
%v1 = extractelement <4 x i32> %load, i32 0
%v2 = extractelement <4 x i32> %load, i32 1
%v3 = extractelement <4 x i32> %load, i32 2
%v4 = extractelement <4 x i32> %load, i32 3
%v5 = insertelement <4 x i32> undef, i32 %v4, i32 0
%v6 = insertelement <4 x i32> %v5, i32 %v3, i32 1
%v7 = insertelement <4 x i32> %v6, i32 %v2, i32 2
%v8 = insertelement <4 x i32> %v7, i32 %v1, i32 3
store <4 x i32> %v8, ptr addrspace(3) %gep, align 4
ret void
}
; GCN-LABEL: test_local_misaligned_v3:
; GCN-DAG: ds_{{read2|load_2addr}}_b32
; GCN-DAG: ds_{{read|load}}_b32
; GCN-DAG: ds_{{write2|store_2addr}}_b32
; GCN-DAG: ds_{{write|store}}_b32
define amdgpu_kernel void @test_local_misaligned_v3(ptr addrspace(3) %arg) {
bb:
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, ptr addrspace(3) %arg, i32 %lid
%load = load <3 x i32>, ptr addrspace(3) %gep, align 4
%v1 = extractelement <3 x i32> %load, i32 0
%v2 = extractelement <3 x i32> %load, i32 1
%v3 = extractelement <3 x i32> %load, i32 2
%v5 = insertelement <3 x i32> undef, i32 %v3, i32 0
%v6 = insertelement <3 x i32> %v5, i32 %v1, i32 1
%v7 = insertelement <3 x i32> %v6, i32 %v2, i32 2
store <3 x i32> %v7, ptr addrspace(3) %gep, align 4
ret void
}
; GCN-LABEL: test_flat_misaligned_v2:
; VECT-DAG: flat_load_{{dwordx2|b64}} v
; VECT-DAG: flat_store_{{dwordx2|b64}} v
; SPLIT-DAG: flat_load_{{dword|b32}} v
; SPLIT-DAG: flat_load_{{dword|b32}} v
; SPLIT-DAG: flat_store_{{dword|b32}} v
; SPLIT-DAG: flat_store_{{dword|b32}} v
define amdgpu_kernel void @test_flat_misaligned_v2(ptr %arg) {
bb:
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, ptr %arg, i32 %lid
%load = load <2 x i32>, ptr %gep, align 4
%v1 = extractelement <2 x i32> %load, i32 0
%v2 = extractelement <2 x i32> %load, i32 1
%v3 = insertelement <2 x i32> undef, i32 %v2, i32 0
%v4 = insertelement <2 x i32> %v3, i32 %v1, i32 1
store <2 x i32> %v4, ptr %gep, align 4
ret void
}
; GCN-LABEL: test_flat_misaligned_v4:
; VECT-DAG: flat_load_{{dwordx4|b128}} v
; VECT-DAG: flat_store_{{dwordx4|b128}} v
; SPLIT-DAG: flat_load_{{dword|b32}} v
; SPLIT-DAG: flat_load_{{dword|b32}} v
; SPLIT-DAG: flat_load_{{dword|b32}} v
; SPLIT-DAG: flat_load_{{dword|b32}} v
; SPLIT-DAG: flat_store_{{dword|b32}} v
; SPLIT-DAG: flat_store_{{dword|b32}} v
; SPLIT-DAG: flat_store_{{dword|b32}} v
; SPLIT-DAG: flat_store_{{dword|b32}} v
define amdgpu_kernel void @test_flat_misaligned_v4(ptr %arg) {
bb:
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, ptr %arg, i32 %lid
%load = load <4 x i32>, ptr %gep, align 4
%v1 = extractelement <4 x i32> %load, i32 0
%v2 = extractelement <4 x i32> %load, i32 1
%v3 = extractelement <4 x i32> %load, i32 2
%v4 = extractelement <4 x i32> %load, i32 3
%v5 = insertelement <4 x i32> undef, i32 %v4, i32 0
%v6 = insertelement <4 x i32> %v5, i32 %v3, i32 1
%v7 = insertelement <4 x i32> %v6, i32 %v2, i32 2
%v8 = insertelement <4 x i32> %v7, i32 %v1, i32 3
store <4 x i32> %v8, ptr %gep, align 4
ret void
}
; GCN-LABEL: test_flat_misaligned_v3:
; VECT-DAG: flat_load_{{dwordx3|b96}} v
; VECT-DAG: flat_store_{{dwordx3|b96}} v
; SPLIT-DAG: flat_load_{{dword|b32}} v
; SPLIT-DAG: flat_load_{{dword|b32}} v
; SPLIT-DAG: flat_load_{{dword|b32}} v
; SPLIT-DAG: flat_store_{{dword|b32}} v
; SPLIT-DAG: flat_store_{{dword|b32}} v
; SPLIT-DAG: flat_store_{{dword|b32}} v
define amdgpu_kernel void @test_flat_misaligned_v3(ptr %arg) {
bb:
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, ptr %arg, i32 %lid
%load = load <3 x i32>, ptr %gep, align 4
%v1 = extractelement <3 x i32> %load, i32 0
%v2 = extractelement <3 x i32> %load, i32 1
%v3 = extractelement <3 x i32> %load, i32 2
%v5 = insertelement <3 x i32> undef, i32 %v3, i32 0
%v6 = insertelement <3 x i32> %v5, i32 %v1, i32 1
%v7 = insertelement <3 x i32> %v6, i32 %v2, i32 2
store <3 x i32> %v7, ptr %gep, align 4
ret void
}
; GCN-LABEL: test_local_aligned_v2:
; GCN-DAG: ds_{{read|load}}_b64
; GCN-DAG: ds_{{write|store}}_b64
define amdgpu_kernel void @test_local_aligned_v2(ptr addrspace(3) %arg) {
bb:
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, ptr addrspace(3) %arg, i32 %lid
%load = load <2 x i32>, ptr addrspace(3) %gep, align 8
%v1 = extractelement <2 x i32> %load, i32 0
%v2 = extractelement <2 x i32> %load, i32 1
%v3 = insertelement <2 x i32> undef, i32 %v2, i32 0
%v4 = insertelement <2 x i32> %v3, i32 %v1, i32 1
store <2 x i32> %v4, ptr addrspace(3) %gep, align 8
ret void
}
; GCN-LABEL: test_local_aligned_v3:
; GCN-DAG: ds_{{read|load}}_b96
; GCN-DAG: ds_{{write|store}}_b96
define amdgpu_kernel void @test_local_aligned_v3(ptr addrspace(3) %arg) {
bb:
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, ptr addrspace(3) %arg, i32 %lid
%load = load <3 x i32>, ptr addrspace(3) %gep, align 16
%v1 = extractelement <3 x i32> %load, i32 0
%v2 = extractelement <3 x i32> %load, i32 1
%v3 = extractelement <3 x i32> %load, i32 2
%v5 = insertelement <3 x i32> undef, i32 %v3, i32 0
%v6 = insertelement <3 x i32> %v5, i32 %v1, i32 1
%v7 = insertelement <3 x i32> %v6, i32 %v2, i32 2
store <3 x i32> %v7, ptr addrspace(3) %gep, align 16
ret void
}
; GCN-LABEL: test_flat_aligned_v2:
; GCN-DAG: flat_load_{{dwordx2|b64}} v
; GCN-DAG: flat_store_{{dwordx2|b64}} v
define amdgpu_kernel void @test_flat_aligned_v2(ptr %arg) {
bb:
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, ptr %arg, i32 %lid
%load = load <2 x i32>, ptr %gep, align 8
%v1 = extractelement <2 x i32> %load, i32 0
%v2 = extractelement <2 x i32> %load, i32 1
%v3 = insertelement <2 x i32> undef, i32 %v2, i32 0
%v4 = insertelement <2 x i32> %v3, i32 %v1, i32 1
store <2 x i32> %v4, ptr %gep, align 8
ret void
}
; GCN-LABEL: test_flat_aligned_v4:
; GCN-DAG: flat_load_{{dwordx4|b128}} v
; GCN-DAG: flat_store_{{dwordx4|b128}} v
define amdgpu_kernel void @test_flat_aligned_v4(ptr %arg) {
bb:
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, ptr %arg, i32 %lid
%load = load <4 x i32>, ptr %gep, align 16
%v1 = extractelement <4 x i32> %load, i32 0
%v2 = extractelement <4 x i32> %load, i32 1
%v3 = extractelement <4 x i32> %load, i32 2
%v4 = extractelement <4 x i32> %load, i32 3
%v5 = insertelement <4 x i32> undef, i32 %v4, i32 0
%v6 = insertelement <4 x i32> %v5, i32 %v3, i32 1
%v7 = insertelement <4 x i32> %v6, i32 %v2, i32 2
%v8 = insertelement <4 x i32> %v7, i32 %v1, i32 3
store <4 x i32> %v8, ptr %gep, align 16
ret void
}
; GCN-LABEL: test_local_v4_aligned8:
; ALIGNED-DAG: ds_{{read2|load_2addr}}_b64
; ALIGNED-DAG: ds_{{write2|store_2addr}}_b64
; UNALIGNED-DAG: ds_{{read2|load_2addr}}_b64
; UNALIGNED-DAG: ds_{{write2|store_2addr}}_b64
define amdgpu_kernel void @test_local_v4_aligned8(ptr addrspace(3) %arg) {
bb:
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, ptr addrspace(3) %arg, i32 %lid
%load = load <4 x i32>, ptr addrspace(3) %gep, align 8
%v1 = extractelement <4 x i32> %load, i32 0
%v2 = extractelement <4 x i32> %load, i32 1
%v3 = extractelement <4 x i32> %load, i32 2
%v4 = extractelement <4 x i32> %load, i32 3
%v5 = insertelement <4 x i32> undef, i32 %v4, i32 0
%v6 = insertelement <4 x i32> %v5, i32 %v3, i32 1
%v7 = insertelement <4 x i32> %v6, i32 %v2, i32 2
%v8 = insertelement <4 x i32> %v7, i32 %v1, i32 3
store <4 x i32> %v8, ptr addrspace(3) %gep, align 8
ret void
}
; GCN-LABEL: test_flat_v4_aligned8:
; VECT-DAG: flat_load_{{dwordx4|b128}} v
; VECT-DAG: flat_store_{{dwordx4|b128}} v
; SPLIT-DAG: flat_load_{{dwordx2|b64}} v
; SPLIT-DAG: flat_load_{{dwordx2|b64}} v
; SPLIT-DAG: flat_store_{{dwordx2|b64}} v
; SPLIT-DAG: flat_store_{{dwordx2|b64}} v
define amdgpu_kernel void @test_flat_v4_aligned8(ptr %arg) {
bb:
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, ptr %arg, i32 %lid
%load = load <4 x i32>, ptr %gep, align 8
%v1 = extractelement <4 x i32> %load, i32 0
%v2 = extractelement <4 x i32> %load, i32 1
%v3 = extractelement <4 x i32> %load, i32 2
%v4 = extractelement <4 x i32> %load, i32 3
%v5 = insertelement <4 x i32> undef, i32 %v4, i32 0
%v6 = insertelement <4 x i32> %v5, i32 %v3, i32 1
%v7 = insertelement <4 x i32> %v6, i32 %v2, i32 2
%v8 = insertelement <4 x i32> %v7, i32 %v1, i32 3
store <4 x i32> %v8, ptr %gep, align 8
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x()