Reland the original patch with additional commit containing fix for two issues: 1. Attempting to bitcast using MVTs with no corresponding LLVM type. getDWordFromOffset now works directly with the original vector to get the corresponding elements given the DWordOffset. 2. Improper bit tracking in CalculateByteProvider for vector types using certain ops. Previously, bit tracking for certain ops (e.g. ISD::TRUNCATE) assumed operands were scalar types, which is not correct since these ops have different semantics depending on vector / scalar. CalculateByteProvider / CalculateSrcByte now exit on vector types, handling which is a TODO.
398 lines
15 KiB
LLVM
398 lines
15 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
|
|
|
|
define amdgpu_kernel void @lsh8_or_and(ptr addrspace(1) nocapture %arg, i32 %arg1) {
|
|
; GCN-LABEL: lsh8_or_and:
|
|
; GCN: ; %bb.0: ; %bb
|
|
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
|
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v3, 0x6050400
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
|
|
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GCN-NEXT: flat_load_dword v2, v[0:1]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
|
|
; GCN-NEXT: flat_store_dword v[0:1], v2
|
|
; GCN-NEXT: s_endpgm
|
|
bb:
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
|
|
%tmp = load i32, ptr addrspace(1) %gep, align 4
|
|
%tmp2 = shl i32 %tmp, 8
|
|
%tmp3 = and i32 %arg1, 255
|
|
%tmp4 = or i32 %tmp2, %tmp3
|
|
store i32 %tmp4, ptr addrspace(1) %gep, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @lsr24_or_and(ptr addrspace(1) nocapture %arg, i32 %arg1) {
|
|
; GCN-LABEL: lsr24_or_and:
|
|
; GCN: ; %bb.0: ; %bb
|
|
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
|
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v3, 0x7060503
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
|
|
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GCN-NEXT: flat_load_dword v2, v[0:1]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_perm_b32 v2, s0, v2, v3
|
|
; GCN-NEXT: flat_store_dword v[0:1], v2
|
|
; GCN-NEXT: s_endpgm
|
|
bb:
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
|
|
%tmp = load i32, ptr addrspace(1) %gep, align 4
|
|
%tmp2 = lshr i32 %tmp, 24
|
|
%tmp3 = and i32 %arg1, 4294967040 ; 0xffffff00
|
|
%tmp4 = or i32 %tmp2, %tmp3
|
|
store i32 %tmp4, ptr addrspace(1) %gep, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @and_or_lsr24(ptr addrspace(1) nocapture %arg, i32 %arg1) {
|
|
; GCN-LABEL: and_or_lsr24:
|
|
; GCN: ; %bb.0: ; %bb
|
|
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
|
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v3, 0x7060503
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
|
|
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GCN-NEXT: flat_load_dword v2, v[0:1]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
|
|
; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
|
|
; GCN-NEXT: flat_store_dword v[0:1], v2
|
|
; GCN-NEXT: s_endpgm
|
|
bb:
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
|
|
%tmp = load i32, ptr addrspace(1) %gep, align 4
|
|
%tmp2 = and i32 %tmp, 4294967040 ; 0xffffff00
|
|
%tmp3 = lshr i32 %arg1, 24
|
|
%tmp4 = or i32 %tmp2, %tmp3
|
|
%tmp5 = xor i32 %tmp4, -2147483648
|
|
store i32 %tmp5, ptr addrspace(1) %gep, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @and_or_and(ptr addrspace(1) nocapture %arg, i32 %arg1) {
|
|
; GCN-LABEL: and_or_and:
|
|
; GCN: ; %bb.0: ; %bb
|
|
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
|
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v3, 0x7020500
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
|
|
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GCN-NEXT: flat_load_dword v2, v[0:1]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
|
|
; GCN-NEXT: flat_store_dword v[0:1], v2
|
|
; GCN-NEXT: s_endpgm
|
|
bb:
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
|
|
%tmp = load i32, ptr addrspace(1) %gep, align 4
|
|
%tmp2 = and i32 %tmp, -16711936
|
|
%tmp3 = and i32 %arg1, 16711935
|
|
%tmp4 = or i32 %tmp2, %tmp3
|
|
store i32 %tmp4, ptr addrspace(1) %gep, align 4
|
|
ret void
|
|
}
|
|
|
|
; FIXME: produce v_alignbit_b32 v2, v2, s0, 24 instead of v_perm
|
|
define amdgpu_kernel void @lsh8_or_lsr24(ptr addrspace(1) nocapture %arg, i32 %arg1) {
|
|
; GCN-LABEL: lsh8_or_lsr24:
|
|
; GCN: ; %bb.0: ; %bb
|
|
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
|
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v3, 0x2010007
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
|
|
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GCN-NEXT: flat_load_dword v2, v[0:1]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_perm_b32 v2, s0, v2, v3
|
|
; GCN-NEXT: flat_store_dword v[0:1], v2
|
|
; GCN-NEXT: s_endpgm
|
|
bb:
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
|
|
%tmp = load i32, ptr addrspace(1) %gep, align 4
|
|
%tmp2 = shl i32 %tmp, 8
|
|
%tmp3 = lshr i32 %arg1, 24
|
|
%tmp4 = or i32 %tmp2, %tmp3
|
|
store i32 %tmp4, ptr addrspace(1) %gep, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @lsh16_or_lsr24(ptr addrspace(1) nocapture %arg, i32 %arg1) {
|
|
; GCN-LABEL: lsh16_or_lsr24:
|
|
; GCN: ; %bb.0: ; %bb
|
|
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
|
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v3, 0x5040c03
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
|
|
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GCN-NEXT: flat_load_dword v2, v[0:1]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
|
|
; GCN-NEXT: flat_store_dword v[0:1], v2
|
|
; GCN-NEXT: s_endpgm
|
|
bb:
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
|
|
%tmp = load i32, ptr addrspace(1) %gep, align 4
|
|
%tmp2 = shl i32 %tmp, 16
|
|
%tmp3 = lshr i32 %arg1, 24
|
|
%tmp4 = or i32 %tmp2, %tmp3
|
|
store i32 %tmp4, ptr addrspace(1) %gep, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @and_xor_and(ptr addrspace(1) nocapture %arg, i32 %arg1) {
|
|
; GCN-LABEL: and_xor_and:
|
|
; GCN: ; %bb.0: ; %bb
|
|
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
|
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v3, 0x7020104
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
|
|
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GCN-NEXT: flat_load_dword v2, v[0:1]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
|
|
; GCN-NEXT: flat_store_dword v[0:1], v2
|
|
; GCN-NEXT: s_endpgm
|
|
bb:
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
|
|
%tmp = load i32, ptr addrspace(1) %gep, align 4
|
|
%tmp2 = and i32 %tmp, -16776961
|
|
%tmp3 = and i32 %arg1, 16776960
|
|
%tmp4 = xor i32 %tmp2, %tmp3
|
|
store i32 %tmp4, ptr addrspace(1) %gep, align 4
|
|
ret void
|
|
}
|
|
|
|
; FIXME here should have been "v_perm_b32" with 0xffff0500 mask.
|
|
define amdgpu_kernel void @and_or_or_and(ptr addrspace(1) nocapture %arg, i32 %arg1) {
|
|
; GCN-LABEL: and_or_or_and:
|
|
; GCN: ; %bb.0: ; %bb
|
|
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
|
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
|
|
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GCN-NEXT: flat_load_dword v2, v[0:1]
|
|
; GCN-NEXT: s_and_b32 s0, s0, 0xff00
|
|
; GCN-NEXT: s_or_b32 s0, s0, 0xffff0000
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_and_b32_e32 v2, 0xff00ff, v2
|
|
; GCN-NEXT: v_or_b32_e32 v2, s0, v2
|
|
; GCN-NEXT: flat_store_dword v[0:1], v2
|
|
; GCN-NEXT: s_endpgm
|
|
bb:
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
|
|
%tmp = load i32, ptr addrspace(1) %gep, align 4
|
|
%and = and i32 %tmp, 16711935 ; 0x00ff00ff
|
|
%tmp1 = and i32 %arg1, 4294967040 ; 0xffffff00
|
|
%tmp2 = or i32 %tmp1, -65536
|
|
%tmp3 = or i32 %tmp2, %and
|
|
store i32 %tmp3, ptr addrspace(1) %gep, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @and_or_and_shl(ptr addrspace(1) nocapture %arg, i32 %arg1) {
|
|
; GCN-LABEL: and_or_and_shl:
|
|
; GCN: ; %bb.0: ; %bb
|
|
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
|
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v3, 0x50c0c00
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
|
|
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GCN-NEXT: flat_load_dword v2, v[0:1]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
|
|
; GCN-NEXT: flat_store_dword v[0:1], v2
|
|
; GCN-NEXT: s_endpgm
|
|
bb:
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
|
|
%tmp = load i32, ptr addrspace(1) %gep, align 4
|
|
%tmp2 = shl i32 %tmp, 16
|
|
%tmp3 = and i32 %arg1, 65535
|
|
%tmp4 = or i32 %tmp2, %tmp3
|
|
%and = and i32 %tmp4, 4278190335
|
|
store i32 %and, ptr addrspace(1) %gep, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @or_and_or(ptr addrspace(1) nocapture %arg, i32 %arg1) {
|
|
; GCN-LABEL: or_and_or:
|
|
; GCN: ; %bb.0: ; %bb
|
|
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
|
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v3, 0x7020104
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
|
|
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GCN-NEXT: flat_load_dword v2, v[0:1]
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_perm_b32 v2, v2, s0, v3
|
|
; GCN-NEXT: flat_store_dword v[0:1], v2
|
|
; GCN-NEXT: s_endpgm
|
|
bb:
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
|
|
%tmp = load i32, ptr addrspace(1) %gep, align 4
|
|
%or1 = or i32 %tmp, 16776960 ; 0x00ffff00
|
|
%or2 = or i32 %arg1, 4278190335 ; 0xff0000ff
|
|
%and = and i32 %or1, %or2
|
|
store i32 %and, ptr addrspace(1) %gep, align 4
|
|
ret void
|
|
}
|
|
|
|
; FIXME here should have been "v_perm_b32" with 0xffff0500 mask.
|
|
define amdgpu_kernel void @known_ffff0500(ptr addrspace(1) nocapture %arg, i32 %arg1) {
|
|
; GCN-LABEL: known_ffff0500:
|
|
; GCN: ; %bb.0: ; %bb
|
|
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
|
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v5, 0xffff8004
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
|
|
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GCN-NEXT: flat_load_dword v4, v[0:1]
|
|
; GCN-NEXT: s_bitset1_b32 s0, 15
|
|
; GCN-NEXT: s_and_b32 s0, s0, 0xff00
|
|
; GCN-NEXT: s_or_b32 s0, s0, 0xffff0000
|
|
; GCN-NEXT: v_mov_b32_e32 v2, s2
|
|
; GCN-NEXT: v_mov_b32_e32 v3, s3
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_or_b32_e32 v4, 4, v4
|
|
; GCN-NEXT: v_and_b32_e32 v4, 0xff00ff, v4
|
|
; GCN-NEXT: v_or_b32_e32 v4, s0, v4
|
|
; GCN-NEXT: flat_store_dword v[0:1], v4
|
|
; GCN-NEXT: flat_store_dword v[2:3], v5
|
|
; GCN-NEXT: s_endpgm
|
|
bb:
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
|
|
%load = load i32, ptr addrspace(1) %gep, align 4
|
|
%mask1 = or i32 %arg1, 32768 ; 0x8000
|
|
%mask2 = or i32 %load, 4
|
|
%and = and i32 %mask2, 16711935 ; 0x00ff00ff
|
|
%tmp1 = and i32 %mask1, 4294967040 ; 0xffffff00
|
|
%tmp2 = or i32 %tmp1, 4294901760 ; 0xffff0000
|
|
%tmp3 = or i32 %tmp2, %and
|
|
store i32 %tmp3, ptr addrspace(1) %gep, align 4
|
|
%v = and i32 %tmp3, 4294934532 ; 0xffff8004
|
|
store i32 %v, ptr addrspace(1) %arg, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @known_050c0c00(ptr addrspace(1) nocapture %arg, i32 %arg1) {
|
|
; GCN-LABEL: known_050c0c00:
|
|
; GCN: ; %bb.0: ; %bb
|
|
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
|
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v5, 0x50c0c00
|
|
; GCN-NEXT: v_mov_b32_e32 v6, 4
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
|
|
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GCN-NEXT: flat_load_dword v4, v[0:1]
|
|
; GCN-NEXT: s_or_b32 s0, s0, 4
|
|
; GCN-NEXT: v_mov_b32_e32 v2, s2
|
|
; GCN-NEXT: v_mov_b32_e32 v3, s3
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_perm_b32 v4, v4, s0, v5
|
|
; GCN-NEXT: flat_store_dword v[0:1], v4
|
|
; GCN-NEXT: flat_store_dword v[2:3], v6
|
|
; GCN-NEXT: s_endpgm
|
|
bb:
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
|
|
%tmp = load i32, ptr addrspace(1) %gep, align 4
|
|
%tmp2 = shl i32 %tmp, 16
|
|
%mask = or i32 %arg1, 4
|
|
%tmp3 = and i32 %mask, 65535
|
|
%tmp4 = or i32 %tmp2, %tmp3
|
|
%and = and i32 %tmp4, 4278190335
|
|
store i32 %and, ptr addrspace(1) %gep, align 4
|
|
%v = and i32 %and, 16776964
|
|
store i32 %v, ptr addrspace(1) %arg, align 4
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @known_ffff8004(ptr addrspace(1) nocapture %arg, i32 %arg1) {
|
|
; GCN-LABEL: known_ffff8004:
|
|
; GCN: ; %bb.0: ; %bb
|
|
; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
|
|
; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c
|
|
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
|
|
; GCN-NEXT: v_mov_b32_e32 v5, 0xffff0500
|
|
; GCN-NEXT: v_mov_b32_e32 v6, 0xffff8004
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: v_mov_b32_e32 v1, s3
|
|
; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0
|
|
; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
|
|
; GCN-NEXT: flat_load_dword v4, v[0:1]
|
|
; GCN-NEXT: s_or_b32 s0, s0, 4
|
|
; GCN-NEXT: v_mov_b32_e32 v2, s2
|
|
; GCN-NEXT: v_mov_b32_e32 v3, s3
|
|
; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
; GCN-NEXT: v_or_b32_e32 v4, 0x8000, v4
|
|
; GCN-NEXT: v_perm_b32 v4, v4, s0, v5
|
|
; GCN-NEXT: flat_store_dword v[0:1], v4
|
|
; GCN-NEXT: flat_store_dword v[2:3], v6
|
|
; GCN-NEXT: s_endpgm
|
|
bb:
|
|
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
%gep = getelementptr i32, ptr addrspace(1) %arg, i32 %id
|
|
%load = load i32, ptr addrspace(1) %gep, align 4
|
|
%mask1 = or i32 %arg1, 4
|
|
%mask2 = or i32 %load, 32768 ; 0x8000
|
|
%and = and i32 %mask1, 16711935 ; 0x00ff00ff
|
|
%tmp1 = and i32 %mask2, 4294967040 ; 0xffffff00
|
|
%tmp2 = or i32 %tmp1, 4294901760 ; 0xffff0000
|
|
%tmp3 = or i32 %tmp2, %and
|
|
store i32 %tmp3, ptr addrspace(1) %gep, align 4
|
|
%v = and i32 %tmp3, 4294934532 ; 0xffff8004
|
|
store i32 %v, ptr addrspace(1) %arg, align 4
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x()
|