This patch adjusts the cost model to account for the ability of the AMDGPU optimizer to group together i8 values into i32 values. Co-authored-by: Erich Keane <ekeane@nvidia.com>
872 lines
55 KiB
LLVM
872 lines
55 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=slp-vectorizer %s | FileCheck -check-prefix=GFX7 %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer %s | FileCheck -check-prefix=GFX8 %s
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer %s | FileCheck -check-prefix=GFX9 %s
|
|
|
|
define protected amdgpu_kernel void @arith_2(<16 x i8> %invec, ptr %out, i32 %flag) {
|
|
; GFX7-LABEL: define protected amdgpu_kernel void @arith_2(
|
|
; GFX7-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; GFX7-NEXT: [[ENTRY:.*:]]
|
|
; GFX7-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0
|
|
; GFX7-NEXT: [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1
|
|
; GFX7-NEXT: [[MUL0:%.*]] = mul i8 [[EL0]], 1
|
|
; GFX7-NEXT: [[MUL1:%.*]] = mul i8 [[EL1]], 1
|
|
; GFX7-NEXT: [[ADD0:%.*]] = add i8 [[MUL0]], 1
|
|
; GFX7-NEXT: [[ADD1:%.*]] = add i8 [[MUL1]], 1
|
|
; GFX7-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0
|
|
; GFX7-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1
|
|
; GFX7-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT]], align 16
|
|
; GFX7-NEXT: ret void
|
|
;
|
|
; GFX8-LABEL: define protected amdgpu_kernel void @arith_2(
|
|
; GFX8-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; GFX8-NEXT: [[ENTRY:.*:]]
|
|
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <2 x i32> <i32 0, i32 1>
|
|
; GFX8-NEXT: [[TMP1:%.*]] = mul <2 x i8> [[TMP0]], splat (i8 1)
|
|
; GFX8-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], splat (i8 1)
|
|
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX8-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT]], align 16
|
|
; GFX8-NEXT: ret void
|
|
;
|
|
; GFX9-LABEL: define protected amdgpu_kernel void @arith_2(
|
|
; GFX9-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; GFX9-NEXT: [[ENTRY:.*:]]
|
|
; GFX9-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <2 x i32> <i32 0, i32 1>
|
|
; GFX9-NEXT: [[TMP1:%.*]] = mul <2 x i8> [[TMP0]], splat (i8 1)
|
|
; GFX9-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], splat (i8 1)
|
|
; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX9-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT]], align 16
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
entry:
|
|
%el0 = extractelement <16 x i8> %invec, i64 0
|
|
%el1 = extractelement <16 x i8> %invec, i64 1
|
|
%mul0 = mul i8 %el0, 1
|
|
%mul1 = mul i8 %el1, 1
|
|
%add0 = add i8 %mul0, 1
|
|
%add1 = add i8 %mul1, 1
|
|
%vecins0 = insertelement <16 x i8> poison, i8 %add0, i64 0
|
|
%vecins1 = insertelement <16 x i8> %vecins0, i8 %add1, i64 1
|
|
store <16 x i8> %vecins1, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define protected amdgpu_kernel void @arith_3(<16 x i8> %invec, ptr %out, i32 %flag) {
|
|
; GFX7-LABEL: define protected amdgpu_kernel void @arith_3(
|
|
; GFX7-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX7-NEXT: [[ENTRY:.*:]]
|
|
; GFX7-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0
|
|
; GFX7-NEXT: [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1
|
|
; GFX7-NEXT: [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2
|
|
; GFX7-NEXT: [[MUL2:%.*]] = mul i8 [[EL0]], 1
|
|
; GFX7-NEXT: [[MUL1:%.*]] = mul i8 [[EL1]], 1
|
|
; GFX7-NEXT: [[MUL3:%.*]] = mul i8 [[EL2]], 1
|
|
; GFX7-NEXT: [[ADD2:%.*]] = add i8 [[MUL2]], 1
|
|
; GFX7-NEXT: [[ADD1:%.*]] = add i8 [[MUL1]], 1
|
|
; GFX7-NEXT: [[ADD3:%.*]] = add i8 [[MUL3]], 1
|
|
; GFX7-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD2]], i64 0
|
|
; GFX7-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1
|
|
; GFX7-NEXT: [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD3]], i64 2
|
|
; GFX7-NEXT: store <16 x i8> [[VECINS2]], ptr [[OUT]], align 16
|
|
; GFX7-NEXT: ret void
|
|
;
|
|
; GFX8-LABEL: define protected amdgpu_kernel void @arith_3(
|
|
; GFX8-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX8-NEXT: [[ENTRY:.*:]]
|
|
; GFX8-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0
|
|
; GFX8-NEXT: [[MUL3:%.*]] = mul i8 [[EL0]], 1
|
|
; GFX8-NEXT: [[ADD3:%.*]] = add i8 [[MUL3]], 1
|
|
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <2 x i32> <i32 1, i32 2>
|
|
; GFX8-NEXT: [[TMP1:%.*]] = mul <2 x i8> [[TMP0]], splat (i8 1)
|
|
; GFX8-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], splat (i8 1)
|
|
; GFX8-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD3]], i64 0
|
|
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX8-NEXT: [[VECINS2:%.*]] = shufflevector <16 x i8> [[VECINS0]], <16 x i8> [[TMP3]], <16 x i32> <i32 0, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX8-NEXT: store <16 x i8> [[VECINS2]], ptr [[OUT]], align 16
|
|
; GFX8-NEXT: ret void
|
|
;
|
|
; GFX9-LABEL: define protected amdgpu_kernel void @arith_3(
|
|
; GFX9-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: [[ENTRY:.*:]]
|
|
; GFX9-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0
|
|
; GFX9-NEXT: [[MUL3:%.*]] = mul i8 [[EL0]], 1
|
|
; GFX9-NEXT: [[ADD3:%.*]] = add i8 [[MUL3]], 1
|
|
; GFX9-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <2 x i32> <i32 1, i32 2>
|
|
; GFX9-NEXT: [[TMP1:%.*]] = mul <2 x i8> [[TMP0]], splat (i8 1)
|
|
; GFX9-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], splat (i8 1)
|
|
; GFX9-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD3]], i64 0
|
|
; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX9-NEXT: [[VECINS2:%.*]] = shufflevector <16 x i8> [[VECINS0]], <16 x i8> [[TMP3]], <16 x i32> <i32 0, i32 16, i32 17, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX9-NEXT: store <16 x i8> [[VECINS2]], ptr [[OUT]], align 16
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
entry:
|
|
%el0 = extractelement <16 x i8> %invec, i64 0
|
|
%el1 = extractelement <16 x i8> %invec, i64 1
|
|
%el2 = extractelement <16 x i8> %invec, i64 2
|
|
%mul0 = mul i8 %el0, 1
|
|
%mul1 = mul i8 %el1, 1
|
|
%mul2 = mul i8 %el2, 1
|
|
%add0 = add i8 %mul0, 1
|
|
%add1 = add i8 %mul1, 1
|
|
%add2 = add i8 %mul2, 1
|
|
%vecins0 = insertelement <16 x i8> poison, i8 %add0, i64 0
|
|
%vecins1 = insertelement <16 x i8> %vecins0, i8 %add1, i64 1
|
|
%vecins2 = insertelement <16 x i8> %vecins1, i8 %add2, i64 2
|
|
store <16 x i8> %vecins2, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define protected amdgpu_kernel void @arith_4(<16 x i8> %invec, ptr %out, i32 %flag) {
|
|
; GFX7-LABEL: define protected amdgpu_kernel void @arith_4(
|
|
; GFX7-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX7-NEXT: [[ENTRY:.*:]]
|
|
; GFX7-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0
|
|
; GFX7-NEXT: [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1
|
|
; GFX7-NEXT: [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2
|
|
; GFX7-NEXT: [[EL3:%.*]] = extractelement <16 x i8> [[INVEC]], i64 3
|
|
; GFX7-NEXT: [[MUL0:%.*]] = mul i8 [[EL0]], 1
|
|
; GFX7-NEXT: [[MUL1:%.*]] = mul i8 [[EL1]], 1
|
|
; GFX7-NEXT: [[MUL2:%.*]] = mul i8 [[EL2]], 1
|
|
; GFX7-NEXT: [[MUL3:%.*]] = mul i8 [[EL3]], 1
|
|
; GFX7-NEXT: [[ADD0:%.*]] = add i8 [[MUL0]], 1
|
|
; GFX7-NEXT: [[ADD1:%.*]] = add i8 [[MUL1]], 1
|
|
; GFX7-NEXT: [[ADD2:%.*]] = add i8 [[MUL2]], 1
|
|
; GFX7-NEXT: [[ADD3:%.*]] = add i8 [[MUL3]], 1
|
|
; GFX7-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0
|
|
; GFX7-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1
|
|
; GFX7-NEXT: [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD2]], i64 2
|
|
; GFX7-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VECINS2]], i8 [[ADD3]], i64 3
|
|
; GFX7-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT]], align 16
|
|
; GFX7-NEXT: ret void
|
|
;
|
|
; GFX8-LABEL: define protected amdgpu_kernel void @arith_4(
|
|
; GFX8-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX8-NEXT: [[ENTRY:.*:]]
|
|
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
; GFX8-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[TMP0]], splat (i8 1)
|
|
; GFX8-NEXT: [[TMP2:%.*]] = add <4 x i8> [[TMP1]], splat (i8 1)
|
|
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX8-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT]], align 16
|
|
; GFX8-NEXT: ret void
|
|
;
|
|
; GFX9-LABEL: define protected amdgpu_kernel void @arith_4(
|
|
; GFX9-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: [[ENTRY:.*:]]
|
|
; GFX9-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
; GFX9-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[TMP0]], splat (i8 1)
|
|
; GFX9-NEXT: [[TMP2:%.*]] = add <4 x i8> [[TMP1]], splat (i8 1)
|
|
; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX9-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT]], align 16
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
entry:
|
|
%el0 = extractelement <16 x i8> %invec, i64 0
|
|
%el1 = extractelement <16 x i8> %invec, i64 1
|
|
%el2 = extractelement <16 x i8> %invec, i64 2
|
|
%el3 = extractelement <16 x i8> %invec, i64 3
|
|
%mul0 = mul i8 %el0, 1
|
|
%mul1 = mul i8 %el1, 1
|
|
%mul2 = mul i8 %el2, 1
|
|
%mul3 = mul i8 %el3, 1
|
|
%add0 = add i8 %mul0, 1
|
|
%add1 = add i8 %mul1, 1
|
|
%add2 = add i8 %mul2, 1
|
|
%add3 = add i8 %mul3, 1
|
|
%vecins0 = insertelement <16 x i8> poison, i8 %add0, i64 0
|
|
%vecins1 = insertelement <16 x i8> %vecins0, i8 %add1, i64 1
|
|
%vecins2 = insertelement <16 x i8> %vecins1, i8 %add2, i64 2
|
|
%vecins3 = insertelement <16 x i8> %vecins2, i8 %add3, i64 3
|
|
store <16 x i8> %vecins3, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define protected amdgpu_kernel void @arith_16(<16 x i8> %invec, ptr %out, i32 %flag) {
|
|
; GFX7-LABEL: define protected amdgpu_kernel void @arith_16(
|
|
; GFX7-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX7-NEXT: [[ENTRY:.*:]]
|
|
; GFX7-NEXT: [[EL0:%.*]] = extractelement <16 x i8> [[INVEC]], i64 0
|
|
; GFX7-NEXT: [[EL1:%.*]] = extractelement <16 x i8> [[INVEC]], i64 1
|
|
; GFX7-NEXT: [[EL2:%.*]] = extractelement <16 x i8> [[INVEC]], i64 2
|
|
; GFX7-NEXT: [[EL3:%.*]] = extractelement <16 x i8> [[INVEC]], i64 3
|
|
; GFX7-NEXT: [[EL4:%.*]] = extractelement <16 x i8> [[INVEC]], i64 4
|
|
; GFX7-NEXT: [[EL5:%.*]] = extractelement <16 x i8> [[INVEC]], i64 5
|
|
; GFX7-NEXT: [[EL6:%.*]] = extractelement <16 x i8> [[INVEC]], i64 6
|
|
; GFX7-NEXT: [[EL7:%.*]] = extractelement <16 x i8> [[INVEC]], i64 7
|
|
; GFX7-NEXT: [[EL8:%.*]] = extractelement <16 x i8> [[INVEC]], i64 8
|
|
; GFX7-NEXT: [[EL9:%.*]] = extractelement <16 x i8> [[INVEC]], i64 9
|
|
; GFX7-NEXT: [[EL10:%.*]] = extractelement <16 x i8> [[INVEC]], i64 10
|
|
; GFX7-NEXT: [[EL11:%.*]] = extractelement <16 x i8> [[INVEC]], i64 11
|
|
; GFX7-NEXT: [[EL12:%.*]] = extractelement <16 x i8> [[INVEC]], i64 12
|
|
; GFX7-NEXT: [[EL13:%.*]] = extractelement <16 x i8> [[INVEC]], i64 13
|
|
; GFX7-NEXT: [[EL14:%.*]] = extractelement <16 x i8> [[INVEC]], i64 14
|
|
; GFX7-NEXT: [[EL15:%.*]] = extractelement <16 x i8> [[INVEC]], i64 15
|
|
; GFX7-NEXT: [[MUL0:%.*]] = mul i8 [[EL0]], 1
|
|
; GFX7-NEXT: [[MUL1:%.*]] = mul i8 [[EL1]], 1
|
|
; GFX7-NEXT: [[MUL2:%.*]] = mul i8 [[EL2]], 1
|
|
; GFX7-NEXT: [[MUL3:%.*]] = mul i8 [[EL3]], 1
|
|
; GFX7-NEXT: [[MUL4:%.*]] = mul i8 [[EL4]], 1
|
|
; GFX7-NEXT: [[MUL5:%.*]] = mul i8 [[EL5]], 1
|
|
; GFX7-NEXT: [[MUL6:%.*]] = mul i8 [[EL6]], 1
|
|
; GFX7-NEXT: [[MUL7:%.*]] = mul i8 [[EL7]], 1
|
|
; GFX7-NEXT: [[MUL8:%.*]] = mul i8 [[EL8]], 1
|
|
; GFX7-NEXT: [[MUL9:%.*]] = mul i8 [[EL9]], 1
|
|
; GFX7-NEXT: [[MUL10:%.*]] = mul i8 [[EL10]], 1
|
|
; GFX7-NEXT: [[MUL11:%.*]] = mul i8 [[EL11]], 1
|
|
; GFX7-NEXT: [[MUL12:%.*]] = mul i8 [[EL12]], 1
|
|
; GFX7-NEXT: [[MUL13:%.*]] = mul i8 [[EL13]], 1
|
|
; GFX7-NEXT: [[MUL14:%.*]] = mul i8 [[EL14]], 1
|
|
; GFX7-NEXT: [[MUL15:%.*]] = mul i8 [[EL15]], 1
|
|
; GFX7-NEXT: [[ADD0:%.*]] = add i8 [[MUL0]], 1
|
|
; GFX7-NEXT: [[ADD1:%.*]] = add i8 [[MUL1]], 1
|
|
; GFX7-NEXT: [[ADD2:%.*]] = add i8 [[MUL2]], 1
|
|
; GFX7-NEXT: [[ADD3:%.*]] = add i8 [[MUL3]], 1
|
|
; GFX7-NEXT: [[ADD4:%.*]] = add i8 [[MUL4]], 1
|
|
; GFX7-NEXT: [[ADD5:%.*]] = add i8 [[MUL5]], 1
|
|
; GFX7-NEXT: [[ADD6:%.*]] = add i8 [[MUL6]], 1
|
|
; GFX7-NEXT: [[ADD7:%.*]] = add i8 [[MUL7]], 1
|
|
; GFX7-NEXT: [[ADD8:%.*]] = add i8 [[MUL8]], 1
|
|
; GFX7-NEXT: [[ADD9:%.*]] = add i8 [[MUL9]], 1
|
|
; GFX7-NEXT: [[ADD10:%.*]] = add i8 [[MUL10]], 1
|
|
; GFX7-NEXT: [[ADD11:%.*]] = add i8 [[MUL11]], 1
|
|
; GFX7-NEXT: [[ADD12:%.*]] = add i8 [[MUL12]], 1
|
|
; GFX7-NEXT: [[ADD13:%.*]] = add i8 [[MUL13]], 1
|
|
; GFX7-NEXT: [[ADD14:%.*]] = add i8 [[MUL14]], 1
|
|
; GFX7-NEXT: [[ADD15:%.*]] = add i8 [[MUL15]], 1
|
|
; GFX7-NEXT: [[VECINS0:%.*]] = insertelement <16 x i8> poison, i8 [[ADD0]], i64 0
|
|
; GFX7-NEXT: [[VECINS1:%.*]] = insertelement <16 x i8> [[VECINS0]], i8 [[ADD1]], i64 1
|
|
; GFX7-NEXT: [[VECINS2:%.*]] = insertelement <16 x i8> [[VECINS1]], i8 [[ADD2]], i64 2
|
|
; GFX7-NEXT: [[VECINS3:%.*]] = insertelement <16 x i8> [[VECINS2]], i8 [[ADD3]], i64 3
|
|
; GFX7-NEXT: [[VECINS4:%.*]] = insertelement <16 x i8> [[VECINS3]], i8 [[ADD4]], i64 4
|
|
; GFX7-NEXT: [[VECINS5:%.*]] = insertelement <16 x i8> [[VECINS4]], i8 [[ADD5]], i64 5
|
|
; GFX7-NEXT: [[VECINS6:%.*]] = insertelement <16 x i8> [[VECINS5]], i8 [[ADD6]], i64 6
|
|
; GFX7-NEXT: [[VECINS7:%.*]] = insertelement <16 x i8> [[VECINS6]], i8 [[ADD7]], i64 7
|
|
; GFX7-NEXT: [[VECINS8:%.*]] = insertelement <16 x i8> [[VECINS7]], i8 [[ADD8]], i64 8
|
|
; GFX7-NEXT: [[VECINS9:%.*]] = insertelement <16 x i8> [[VECINS8]], i8 [[ADD9]], i64 9
|
|
; GFX7-NEXT: [[VECINS10:%.*]] = insertelement <16 x i8> [[VECINS9]], i8 [[ADD10]], i64 10
|
|
; GFX7-NEXT: [[VECINS11:%.*]] = insertelement <16 x i8> [[VECINS10]], i8 [[ADD11]], i64 11
|
|
; GFX7-NEXT: [[VECINS12:%.*]] = insertelement <16 x i8> [[VECINS11]], i8 [[ADD12]], i64 12
|
|
; GFX7-NEXT: [[VECINS13:%.*]] = insertelement <16 x i8> [[VECINS12]], i8 [[ADD13]], i64 13
|
|
; GFX7-NEXT: [[VECINS14:%.*]] = insertelement <16 x i8> [[VECINS13]], i8 [[ADD14]], i64 14
|
|
; GFX7-NEXT: [[VECINS153:%.*]] = insertelement <16 x i8> [[VECINS14]], i8 [[ADD15]], i64 15
|
|
; GFX7-NEXT: store <16 x i8> [[VECINS153]], ptr [[OUT]], align 16
|
|
; GFX7-NEXT: ret void
|
|
;
|
|
; GFX8-LABEL: define protected amdgpu_kernel void @arith_16(
|
|
; GFX8-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX8-NEXT: [[ENTRY:.*:]]
|
|
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
; GFX8-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[TMP0]], splat (i8 1)
|
|
; GFX8-NEXT: [[TMP2:%.*]] = add <4 x i8> [[TMP1]], splat (i8 1)
|
|
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
|
; GFX8-NEXT: [[TMP4:%.*]] = mul <4 x i8> [[TMP3]], splat (i8 1)
|
|
; GFX8-NEXT: [[TMP5:%.*]] = add <4 x i8> [[TMP4]], splat (i8 1)
|
|
; GFX8-NEXT: [[TMP6:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
|
|
; GFX8-NEXT: [[TMP7:%.*]] = mul <4 x i8> [[TMP6]], splat (i8 1)
|
|
; GFX8-NEXT: [[TMP8:%.*]] = add <4 x i8> [[TMP7]], splat (i8 1)
|
|
; GFX8-NEXT: [[TMP9:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
|
|
; GFX8-NEXT: [[TMP10:%.*]] = mul <4 x i8> [[TMP9]], splat (i8 1)
|
|
; GFX8-NEXT: [[TMP11:%.*]] = add <4 x i8> [[TMP10]], splat (i8 1)
|
|
; GFX8-NEXT: [[TMP12:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX8-NEXT: [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX8-NEXT: [[VECINS71:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> [[TMP13]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
; GFX8-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP8]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX8-NEXT: [[VECINS112:%.*]] = shufflevector <16 x i8> [[VECINS71]], <16 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
|
|
; GFX8-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX8-NEXT: [[VECINS153:%.*]] = shufflevector <16 x i8> [[VECINS112]], <16 x i8> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
|
|
; GFX8-NEXT: store <16 x i8> [[VECINS153]], ptr [[OUT]], align 16
|
|
; GFX8-NEXT: ret void
|
|
;
|
|
; GFX9-LABEL: define protected amdgpu_kernel void @arith_16(
|
|
; GFX9-SAME: <16 x i8> [[INVEC:%.*]], ptr [[OUT:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: [[ENTRY:.*:]]
|
|
; GFX9-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
; GFX9-NEXT: [[TMP1:%.*]] = mul <4 x i8> [[TMP0]], splat (i8 1)
|
|
; GFX9-NEXT: [[TMP2:%.*]] = add <4 x i8> [[TMP1]], splat (i8 1)
|
|
; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
|
; GFX9-NEXT: [[TMP4:%.*]] = mul <4 x i8> [[TMP3]], splat (i8 1)
|
|
; GFX9-NEXT: [[TMP5:%.*]] = add <4 x i8> [[TMP4]], splat (i8 1)
|
|
; GFX9-NEXT: [[TMP6:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
|
|
; GFX9-NEXT: [[TMP7:%.*]] = mul <4 x i8> [[TMP6]], splat (i8 1)
|
|
; GFX9-NEXT: [[TMP8:%.*]] = add <4 x i8> [[TMP7]], splat (i8 1)
|
|
; GFX9-NEXT: [[TMP9:%.*]] = shufflevector <16 x i8> [[INVEC]], <16 x i8> poison, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
|
|
; GFX9-NEXT: [[TMP10:%.*]] = mul <4 x i8> [[TMP9]], splat (i8 1)
|
|
; GFX9-NEXT: [[TMP11:%.*]] = add <4 x i8> [[TMP10]], splat (i8 1)
|
|
; GFX9-NEXT: [[TMP12:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX9-NEXT: [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX9-NEXT: [[VECINS71:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> [[TMP13]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
; GFX9-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP8]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX9-NEXT: [[VECINS112:%.*]] = shufflevector <16 x i8> [[VECINS71]], <16 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
|
|
; GFX9-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX9-NEXT: [[VECINS153:%.*]] = shufflevector <16 x i8> [[VECINS112]], <16 x i8> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
|
|
; GFX9-NEXT: store <16 x i8> [[VECINS153]], ptr [[OUT]], align 16
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
entry:
|
|
%el0 = extractelement <16 x i8> %invec, i64 0
|
|
%el1 = extractelement <16 x i8> %invec, i64 1
|
|
%el2 = extractelement <16 x i8> %invec, i64 2
|
|
%el3 = extractelement <16 x i8> %invec, i64 3
|
|
%el4 = extractelement <16 x i8> %invec, i64 4
|
|
%el5 = extractelement <16 x i8> %invec, i64 5
|
|
%el6 = extractelement <16 x i8> %invec, i64 6
|
|
%el7 = extractelement <16 x i8> %invec, i64 7
|
|
%el8 = extractelement <16 x i8> %invec, i64 8
|
|
%el9 = extractelement <16 x i8> %invec, i64 9
|
|
%el10 = extractelement <16 x i8> %invec, i64 10
|
|
%el11 = extractelement <16 x i8> %invec, i64 11
|
|
%el12 = extractelement <16 x i8> %invec, i64 12
|
|
%el13 = extractelement <16 x i8> %invec, i64 13
|
|
%el14 = extractelement <16 x i8> %invec, i64 14
|
|
%el15 = extractelement <16 x i8> %invec, i64 15
|
|
%mul0 = mul i8 %el0, 1
|
|
%mul1 = mul i8 %el1, 1
|
|
%mul2 = mul i8 %el2, 1
|
|
%mul3 = mul i8 %el3, 1
|
|
%mul4 = mul i8 %el4, 1
|
|
%mul5 = mul i8 %el5, 1
|
|
%mul6 = mul i8 %el6, 1
|
|
%mul7 = mul i8 %el7, 1
|
|
%mul8 = mul i8 %el8, 1
|
|
%mul9 = mul i8 %el9, 1
|
|
%mul10 = mul i8 %el10, 1
|
|
%mul11 = mul i8 %el11, 1
|
|
%mul12 = mul i8 %el12, 1
|
|
%mul13 = mul i8 %el13, 1
|
|
%mul14 = mul i8 %el14, 1
|
|
%mul15 = mul i8 %el15, 1
|
|
%add0 = add i8 %mul0, 1
|
|
%add1 = add i8 %mul1, 1
|
|
%add2 = add i8 %mul2, 1
|
|
%add3 = add i8 %mul3, 1
|
|
%add4 = add i8 %mul4, 1
|
|
%add5 = add i8 %mul5, 1
|
|
%add6 = add i8 %mul6, 1
|
|
%add7 = add i8 %mul7, 1
|
|
%add8 = add i8 %mul8, 1
|
|
%add9 = add i8 %mul9, 1
|
|
%add10 = add i8 %mul10, 1
|
|
%add11 = add i8 %mul11, 1
|
|
%add12 = add i8 %mul12, 1
|
|
%add13 = add i8 %mul13, 1
|
|
%add14 = add i8 %mul14, 1
|
|
%add15 = add i8 %mul15, 1
|
|
%vecins0 = insertelement <16 x i8> poison, i8 %add0, i64 0
|
|
%vecins1 = insertelement <16 x i8> %vecins0, i8 %add1, i64 1
|
|
%vecins2 = insertelement <16 x i8> %vecins1, i8 %add2, i64 2
|
|
%vecins3 = insertelement <16 x i8> %vecins2, i8 %add3, i64 3
|
|
%vecins4 = insertelement <16 x i8> %vecins3, i8 %add4, i64 4
|
|
%vecins5 = insertelement <16 x i8> %vecins4, i8 %add5, i64 5
|
|
%vecins6 = insertelement <16 x i8> %vecins5, i8 %add6, i64 6
|
|
%vecins7 = insertelement <16 x i8> %vecins6, i8 %add7, i64 7
|
|
%vecins8 = insertelement <16 x i8> %vecins7, i8 %add8, i64 8
|
|
%vecins9 = insertelement <16 x i8> %vecins8, i8 %add9, i64 9
|
|
%vecins10 = insertelement <16 x i8> %vecins9, i8 %add10, i64 10
|
|
%vecins11 = insertelement <16 x i8> %vecins10, i8 %add11, i64 11
|
|
%vecins12 = insertelement <16 x i8> %vecins11, i8 %add12, i64 12
|
|
%vecins13 = insertelement <16 x i8> %vecins12, i8 %add13, i64 13
|
|
%vecins14 = insertelement <16 x i8> %vecins13, i8 %add14, i64 14
|
|
%vecins15 = insertelement <16 x i8> %vecins14, i8 %add15, i64 15
|
|
store <16 x i8> %vecins15, ptr %out
|
|
ret void
|
|
}
|
|
|
|
define protected amdgpu_kernel void @phi_2(ptr addrspace(3) %inptr0, ptr addrspace(3) %inptr1, ptr %out, ptr %out1, i32 %flag) {
|
|
; GFX7-LABEL: define protected amdgpu_kernel void @phi_2(
|
|
; GFX7-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX7-NEXT: [[ENTRY:.*]]:
|
|
; GFX7-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0
|
|
; GFX7-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX7-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1
|
|
; GFX7-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1
|
|
; GFX7-NEXT: br label %[[DO_BODY:.*]]
|
|
; GFX7: [[DO_BODY]]:
|
|
; GFX7-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], %[[ENTRY]] ], [ [[OTHERELE1:%.*]], %[[DO_BODY]] ]
|
|
; GFX7-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], %[[ENTRY]] ], [ [[OTHERELE0:%.*]], %[[DO_BODY]] ]
|
|
; GFX7-NEXT: [[OTHERELE0]] = load i8, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX7-NEXT: [[OTHERELE1]] = load i8, ptr addrspace(3) [[GEP1]], align 1
|
|
; GFX7-NEXT: [[VEC00:%.*]] = insertelement <16 x i8> poison, i8 [[OTHERELE0]], i64 8
|
|
; GFX7-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VEC00]], i8 [[OTHERELE1]], i64 9
|
|
; GFX7-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8
|
|
; GFX7-NEXT: [[VEC111:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9
|
|
; GFX7-NEXT: store <16 x i8> [[VEC111]], ptr addrspace(3) [[INPTR1]], align 2
|
|
; GFX7-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0
|
|
; GFX7-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]]
|
|
; GFX7: [[EXIT]]:
|
|
; GFX7-NEXT: store <16 x i8> [[VEC111]], ptr [[OUT]], align 16
|
|
; GFX7-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1]], align 16
|
|
; GFX7-NEXT: ret void
|
|
;
|
|
; GFX8-LABEL: define protected amdgpu_kernel void @phi_2(
|
|
; GFX8-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX8-NEXT: [[ENTRY:.*]]:
|
|
; GFX8-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0
|
|
; GFX8-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX8-NEXT: br label %[[DO_BODY:.*]]
|
|
; GFX8: [[DO_BODY]]:
|
|
; GFX8-NEXT: [[TMP1:%.*]] = phi <2 x i8> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP2:%.*]], %[[DO_BODY]] ]
|
|
; GFX8-NEXT: [[TMP2]] = load <2 x i8>, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX8-NEXT: [[VEC111:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX8-NEXT: store <16 x i8> [[VEC111]], ptr addrspace(3) [[INPTR1]], align 2
|
|
; GFX8-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0
|
|
; GFX8-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]]
|
|
; GFX8: [[EXIT]]:
|
|
; GFX8-NEXT: store <16 x i8> [[VEC111]], ptr [[OUT]], align 16
|
|
; GFX8-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1]], align 16
|
|
; GFX8-NEXT: ret void
|
|
;
|
|
; GFX9-LABEL: define protected amdgpu_kernel void @phi_2(
|
|
; GFX9-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: [[ENTRY:.*]]:
|
|
; GFX9-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0
|
|
; GFX9-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX9-NEXT: br label %[[DO_BODY:.*]]
|
|
; GFX9: [[DO_BODY]]:
|
|
; GFX9-NEXT: [[TMP1:%.*]] = phi <2 x i8> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP2:%.*]], %[[DO_BODY]] ]
|
|
; GFX9-NEXT: [[TMP2]] = load <2 x i8>, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX9-NEXT: [[VEC111:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX9-NEXT: store <16 x i8> [[VEC111]], ptr addrspace(3) [[INPTR1]], align 2
|
|
; GFX9-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0
|
|
; GFX9-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]]
|
|
; GFX9: [[EXIT]]:
|
|
; GFX9-NEXT: store <16 x i8> [[VEC111]], ptr [[OUT]], align 16
|
|
; GFX9-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1]], align 16
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
entry:
|
|
%gep0 = getelementptr i8, ptr addrspace(3) %inptr0, i32 0
|
|
%ele0 = load i8, ptr addrspace(3) %gep0, align 8
|
|
%gep1 = getelementptr i8, ptr addrspace(3) %inptr0, i32 1
|
|
%ele1 = load i8, ptr addrspace(3) %gep1, align 1
|
|
br label %do.body
|
|
|
|
do.body:
|
|
%phi2 = phi i8 [ %ele1, %entry ], [ %otherele1, %do.body ]
|
|
%phi3 = phi i8 [ %ele0, %entry ], [ %otherele0, %do.body ]
|
|
%otherele0 = load i8, ptr addrspace(3) %gep0, align 8
|
|
%otherele1 = load i8, ptr addrspace(3) %gep1, align 1
|
|
%vec00 = insertelement <16 x i8> poison, i8 %otherele0, i64 8
|
|
%vec01 = insertelement <16 x i8> %vec00, i8 %otherele1, i64 9
|
|
%vec10 = insertelement <16 x i8> poison, i8 %phi3, i64 8
|
|
%vec11 = insertelement <16 x i8> %vec10, i8 %phi2, i64 9
|
|
store <16 x i8> %vec11, ptr addrspace(3) %inptr1, align 2
|
|
%cmp = icmp eq i32 %flag, 0
|
|
br i1 %cmp, label %exit, label %do.body
|
|
|
|
exit:
|
|
store <16 x i8> %vec11, ptr %out
|
|
store <16 x i8> %vec01, ptr %out1
|
|
ret void
|
|
}
|
|
|
|
define protected amdgpu_kernel void @phi_3(ptr addrspace(3) %inptr0, ptr addrspace(3) %inptr1, ptr %out, ptr %out1, i32 %flag) {
|
|
; GFX7-LABEL: define protected amdgpu_kernel void @phi_3(
|
|
; GFX7-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX7-NEXT: [[ENTRY:.*]]:
|
|
; GFX7-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0
|
|
; GFX7-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX7-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1
|
|
; GFX7-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1
|
|
; GFX7-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 2
|
|
; GFX7-NEXT: [[ELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2
|
|
; GFX7-NEXT: br label %[[DO_BODY:.*]]
|
|
; GFX7: [[DO_BODY]]:
|
|
; GFX7-NEXT: [[PHI1:%.*]] = phi i8 [ [[ELE2]], %[[ENTRY]] ], [ [[OTHERELE2:%.*]], %[[DO_BODY]] ]
|
|
; GFX7-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], %[[ENTRY]] ], [ [[OTHERELE1:%.*]], %[[DO_BODY]] ]
|
|
; GFX7-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], %[[ENTRY]] ], [ [[OTHERELE0:%.*]], %[[DO_BODY]] ]
|
|
; GFX7-NEXT: [[OTHERELE0]] = load i8, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX7-NEXT: [[OTHERELE1]] = load i8, ptr addrspace(3) [[GEP1]], align 1
|
|
; GFX7-NEXT: [[OTHERELE2]] = load i8, ptr addrspace(3) [[GEP2]], align 2
|
|
; GFX7-NEXT: [[VEC00:%.*]] = insertelement <16 x i8> poison, i8 [[OTHERELE0]], i64 8
|
|
; GFX7-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VEC00]], i8 [[OTHERELE1]], i64 9
|
|
; GFX7-NEXT: [[VEC02:%.*]] = insertelement <16 x i8> [[TMP3]], i8 [[OTHERELE2]], i64 10
|
|
; GFX7-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8
|
|
; GFX7-NEXT: [[VEC111:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9
|
|
; GFX7-NEXT: [[VEC12:%.*]] = insertelement <16 x i8> [[VEC111]], i8 [[PHI1]], i64 10
|
|
; GFX7-NEXT: store <16 x i8> [[VEC12]], ptr addrspace(3) [[INPTR1]], align 2
|
|
; GFX7-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0
|
|
; GFX7-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]]
|
|
; GFX7: [[EXIT]]:
|
|
; GFX7-NEXT: store <16 x i8> [[VEC12]], ptr [[OUT]], align 16
|
|
; GFX7-NEXT: store <16 x i8> [[VEC02]], ptr [[OUT1]], align 16
|
|
; GFX7-NEXT: ret void
|
|
;
|
|
; GFX8-LABEL: define protected amdgpu_kernel void @phi_3(
|
|
; GFX8-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX8-NEXT: [[ENTRY:.*]]:
|
|
; GFX8-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0
|
|
; GFX8-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX8-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 2
|
|
; GFX8-NEXT: [[ELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2
|
|
; GFX8-NEXT: br label %[[DO_BODY:.*]]
|
|
; GFX8: [[DO_BODY]]:
|
|
; GFX8-NEXT: [[PHI1:%.*]] = phi i8 [ [[ELE2]], %[[ENTRY]] ], [ [[OTHERELE2:%.*]], %[[DO_BODY]] ]
|
|
; GFX8-NEXT: [[TMP1:%.*]] = phi <2 x i8> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP2:%.*]], %[[DO_BODY]] ]
|
|
; GFX8-NEXT: [[TMP2]] = load <2 x i8>, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX8-NEXT: [[OTHERELE2]] = load i8, ptr addrspace(3) [[GEP2]], align 2
|
|
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX8-NEXT: [[VEC02:%.*]] = insertelement <16 x i8> [[TMP3]], i8 [[OTHERELE2]], i64 10
|
|
; GFX8-NEXT: [[VEC111:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX8-NEXT: [[VEC12:%.*]] = insertelement <16 x i8> [[VEC111]], i8 [[PHI1]], i64 10
|
|
; GFX8-NEXT: store <16 x i8> [[VEC12]], ptr addrspace(3) [[INPTR1]], align 2
|
|
; GFX8-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0
|
|
; GFX8-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]]
|
|
; GFX8: [[EXIT]]:
|
|
; GFX8-NEXT: store <16 x i8> [[VEC12]], ptr [[OUT]], align 16
|
|
; GFX8-NEXT: store <16 x i8> [[VEC02]], ptr [[OUT1]], align 16
|
|
; GFX8-NEXT: ret void
|
|
;
|
|
; GFX9-LABEL: define protected amdgpu_kernel void @phi_3(
|
|
; GFX9-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: [[ENTRY:.*]]:
|
|
; GFX9-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0
|
|
; GFX9-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX9-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 2
|
|
; GFX9-NEXT: [[ELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2
|
|
; GFX9-NEXT: br label %[[DO_BODY:.*]]
|
|
; GFX9: [[DO_BODY]]:
|
|
; GFX9-NEXT: [[PHI1:%.*]] = phi i8 [ [[ELE2]], %[[ENTRY]] ], [ [[OTHERELE2:%.*]], %[[DO_BODY]] ]
|
|
; GFX9-NEXT: [[TMP1:%.*]] = phi <2 x i8> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP2:%.*]], %[[DO_BODY]] ]
|
|
; GFX9-NEXT: [[TMP2]] = load <2 x i8>, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX9-NEXT: [[OTHERELE2]] = load i8, ptr addrspace(3) [[GEP2]], align 2
|
|
; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX9-NEXT: [[VEC02:%.*]] = insertelement <16 x i8> [[TMP3]], i8 [[OTHERELE2]], i64 10
|
|
; GFX9-NEXT: [[VEC111:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX9-NEXT: [[VEC12:%.*]] = insertelement <16 x i8> [[VEC111]], i8 [[PHI1]], i64 10
|
|
; GFX9-NEXT: store <16 x i8> [[VEC12]], ptr addrspace(3) [[INPTR1]], align 2
|
|
; GFX9-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0
|
|
; GFX9-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]]
|
|
; GFX9: [[EXIT]]:
|
|
; GFX9-NEXT: store <16 x i8> [[VEC12]], ptr [[OUT]], align 16
|
|
; GFX9-NEXT: store <16 x i8> [[VEC02]], ptr [[OUT1]], align 16
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
entry:
|
|
%gep0 = getelementptr i8, ptr addrspace(3) %inptr0, i32 0
|
|
%ele0 = load i8, ptr addrspace(3) %gep0, align 8
|
|
%gep1 = getelementptr i8, ptr addrspace(3) %inptr0, i32 1
|
|
%ele1 = load i8, ptr addrspace(3) %gep1, align 1
|
|
%gep2 = getelementptr i8, ptr addrspace(3) %inptr0, i32 2
|
|
%ele2 = load i8, ptr addrspace(3) %gep2, align 2
|
|
br label %do.body
|
|
|
|
do.body:
|
|
%phi1 = phi i8 [ %ele2, %entry ], [ %otherele2, %do.body ]
|
|
%phi2 = phi i8 [ %ele1, %entry ], [ %otherele1, %do.body ]
|
|
%phi3 = phi i8 [ %ele0, %entry ], [ %otherele0, %do.body ]
|
|
%otherele0 = load i8, ptr addrspace(3) %gep0, align 8
|
|
%otherele1 = load i8, ptr addrspace(3) %gep1, align 1
|
|
%otherele2 = load i8, ptr addrspace(3) %gep2, align 2
|
|
%vec00 = insertelement <16 x i8> poison, i8 %otherele0, i64 8
|
|
%vec01 = insertelement <16 x i8> %vec00, i8 %otherele1, i64 9
|
|
%vec02 = insertelement <16 x i8> %vec01, i8 %otherele2, i64 10
|
|
%vec10 = insertelement <16 x i8> poison, i8 %phi3, i64 8
|
|
%vec11 = insertelement <16 x i8> %vec10, i8 %phi2, i64 9
|
|
%vec12 = insertelement <16 x i8> %vec11, i8 %phi1, i64 10
|
|
store <16 x i8> %vec12, ptr addrspace(3) %inptr1, align 2
|
|
%cmp = icmp eq i32 %flag, 0
|
|
br i1 %cmp, label %exit, label %do.body
|
|
|
|
exit:
|
|
store <16 x i8> %vec12, ptr %out
|
|
store <16 x i8> %vec02, ptr %out1
|
|
ret void
|
|
}
|
|
|
|
define protected amdgpu_kernel void @phi_4(ptr addrspace(3) %inptr0, ptr addrspace(3) %inptr1, ptr %out, ptr %out1, i32 %flag) {
|
|
; GFX7-LABEL: define protected amdgpu_kernel void @phi_4(
|
|
; GFX7-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX7-NEXT: [[ENTRY:.*]]:
|
|
; GFX7-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0
|
|
; GFX7-NEXT: [[ELE0:%.*]] = load i8, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX7-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 1
|
|
; GFX7-NEXT: [[ELE1:%.*]] = load i8, ptr addrspace(3) [[GEP1]], align 1
|
|
; GFX7-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 2
|
|
; GFX7-NEXT: [[ELE2:%.*]] = load i8, ptr addrspace(3) [[GEP2]], align 2
|
|
; GFX7-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 3
|
|
; GFX7-NEXT: [[ELE3:%.*]] = load i8, ptr addrspace(3) [[GEP3]], align 1
|
|
; GFX7-NEXT: br label %[[DO_BODY:.*]]
|
|
; GFX7: [[DO_BODY]]:
|
|
; GFX7-NEXT: [[PHI0:%.*]] = phi i8 [ [[ELE3]], %[[ENTRY]] ], [ [[OTHERELE3:%.*]], %[[DO_BODY]] ]
|
|
; GFX7-NEXT: [[PHI1:%.*]] = phi i8 [ [[ELE2]], %[[ENTRY]] ], [ [[OTHERELE2:%.*]], %[[DO_BODY]] ]
|
|
; GFX7-NEXT: [[PHI2:%.*]] = phi i8 [ [[ELE1]], %[[ENTRY]] ], [ [[OTHERELE1:%.*]], %[[DO_BODY]] ]
|
|
; GFX7-NEXT: [[PHI3:%.*]] = phi i8 [ [[ELE0]], %[[ENTRY]] ], [ [[OTHERELE0:%.*]], %[[DO_BODY]] ]
|
|
; GFX7-NEXT: [[OTHERELE0]] = load i8, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX7-NEXT: [[OTHERELE1]] = load i8, ptr addrspace(3) [[GEP1]], align 1
|
|
; GFX7-NEXT: [[OTHERELE2]] = load i8, ptr addrspace(3) [[GEP2]], align 2
|
|
; GFX7-NEXT: [[OTHERELE3]] = load i8, ptr addrspace(3) [[GEP3]], align 1
|
|
; GFX7-NEXT: [[VEC00:%.*]] = insertelement <16 x i8> poison, i8 [[OTHERELE0]], i64 8
|
|
; GFX7-NEXT: [[VEC01:%.*]] = insertelement <16 x i8> [[VEC00]], i8 [[OTHERELE1]], i64 9
|
|
; GFX7-NEXT: [[VEC02:%.*]] = insertelement <16 x i8> [[VEC01]], i8 [[OTHERELE2]], i64 10
|
|
; GFX7-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[VEC02]], i8 [[OTHERELE3]], i64 11
|
|
; GFX7-NEXT: [[VEC10:%.*]] = insertelement <16 x i8> poison, i8 [[PHI3]], i64 8
|
|
; GFX7-NEXT: [[VEC11:%.*]] = insertelement <16 x i8> [[VEC10]], i8 [[PHI2]], i64 9
|
|
; GFX7-NEXT: [[VEC12:%.*]] = insertelement <16 x i8> [[VEC11]], i8 [[PHI1]], i64 10
|
|
; GFX7-NEXT: [[VEC131:%.*]] = insertelement <16 x i8> [[VEC12]], i8 [[PHI0]], i64 11
|
|
; GFX7-NEXT: store <16 x i8> [[VEC131]], ptr addrspace(3) [[INPTR1]], align 2
|
|
; GFX7-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0
|
|
; GFX7-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]]
|
|
; GFX7: [[EXIT]]:
|
|
; GFX7-NEXT: store <16 x i8> [[VEC131]], ptr [[OUT]], align 16
|
|
; GFX7-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1]], align 16
|
|
; GFX7-NEXT: ret void
|
|
;
|
|
; GFX8-LABEL: define protected amdgpu_kernel void @phi_4(
|
|
; GFX8-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX8-NEXT: [[ENTRY:.*]]:
|
|
; GFX8-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0
|
|
; GFX8-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX8-NEXT: br label %[[DO_BODY:.*]]
|
|
; GFX8: [[DO_BODY]]:
|
|
; GFX8-NEXT: [[TMP1:%.*]] = phi <4 x i8> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP2:%.*]], %[[DO_BODY]] ]
|
|
; GFX8-NEXT: [[TMP2]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX8-NEXT: [[VEC131:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX8-NEXT: store <16 x i8> [[VEC131]], ptr addrspace(3) [[INPTR1]], align 2
|
|
; GFX8-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0
|
|
; GFX8-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]]
|
|
; GFX8: [[EXIT]]:
|
|
; GFX8-NEXT: store <16 x i8> [[VEC131]], ptr [[OUT]], align 16
|
|
; GFX8-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1]], align 16
|
|
; GFX8-NEXT: ret void
|
|
;
|
|
; GFX9-LABEL: define protected amdgpu_kernel void @phi_4(
|
|
; GFX9-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: [[ENTRY:.*]]:
|
|
; GFX9-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0
|
|
; GFX9-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX9-NEXT: br label %[[DO_BODY:.*]]
|
|
; GFX9: [[DO_BODY]]:
|
|
; GFX9-NEXT: [[TMP1:%.*]] = phi <4 x i8> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP2:%.*]], %[[DO_BODY]] ]
|
|
; GFX9-NEXT: [[TMP2]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX9-NEXT: [[VEC131:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX9-NEXT: store <16 x i8> [[VEC131]], ptr addrspace(3) [[INPTR1]], align 2
|
|
; GFX9-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0
|
|
; GFX9-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]]
|
|
; GFX9: [[EXIT]]:
|
|
; GFX9-NEXT: store <16 x i8> [[VEC131]], ptr [[OUT]], align 16
|
|
; GFX9-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1]], align 16
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
entry:
|
|
%gep0 = getelementptr i8, ptr addrspace(3) %inptr0, i32 0
|
|
%ele0 = load i8, ptr addrspace(3) %gep0, align 8
|
|
%gep1 = getelementptr i8, ptr addrspace(3) %inptr0, i32 1
|
|
%ele1 = load i8, ptr addrspace(3) %gep1, align 1
|
|
%gep2 = getelementptr i8, ptr addrspace(3) %inptr0, i32 2
|
|
%ele2 = load i8, ptr addrspace(3) %gep2, align 2
|
|
%gep3 = getelementptr i8, ptr addrspace(3) %inptr0, i32 3
|
|
%ele3 = load i8, ptr addrspace(3) %gep3, align 1
|
|
br label %do.body
|
|
|
|
do.body:
|
|
%phi0 = phi i8 [ %ele3, %entry ], [ %otherele3, %do.body ]
|
|
%phi1 = phi i8 [ %ele2, %entry ], [ %otherele2, %do.body ]
|
|
%phi2 = phi i8 [ %ele1, %entry ], [ %otherele1, %do.body ]
|
|
%phi3 = phi i8 [ %ele0, %entry ], [ %otherele0, %do.body ]
|
|
%otherele0 = load i8, ptr addrspace(3) %gep0, align 8
|
|
%otherele1 = load i8, ptr addrspace(3) %gep1, align 1
|
|
%otherele2 = load i8, ptr addrspace(3) %gep2, align 2
|
|
%otherele3 = load i8, ptr addrspace(3) %gep3, align 1
|
|
%vec00 = insertelement <16 x i8> poison, i8 %otherele0, i64 8
|
|
%vec01 = insertelement <16 x i8> %vec00, i8 %otherele1, i64 9
|
|
%vec02 = insertelement <16 x i8> %vec01, i8 %otherele2, i64 10
|
|
%vec03 = insertelement <16 x i8> %vec02, i8 %otherele3, i64 11
|
|
%vec10 = insertelement <16 x i8> poison, i8 %phi3, i64 8
|
|
%vec11 = insertelement <16 x i8> %vec10, i8 %phi2, i64 9
|
|
%vec12 = insertelement <16 x i8> %vec11, i8 %phi1, i64 10
|
|
%vec13 = insertelement <16 x i8> %vec12, i8 %phi0, i64 11
|
|
store <16 x i8> %vec13, ptr addrspace(3) %inptr1, align 2
|
|
%cmp = icmp eq i32 %flag, 0
|
|
br i1 %cmp, label %exit, label %do.body
|
|
|
|
exit:
|
|
store <16 x i8> %vec13, ptr %out
|
|
store <16 x i8> %vec03, ptr %out1
|
|
ret void
|
|
}
|
|
|
|
define protected amdgpu_kernel void @phi_4_with_stores(ptr addrspace(3) %inptr0, ptr addrspace(3) %inptr1, ptr %out, ptr %out1, i32 %flag) {
|
|
; GFX7-LABEL: define protected amdgpu_kernel void @phi_4_with_stores(
|
|
; GFX7-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX7-NEXT: [[ENTRY:.*]]:
|
|
; GFX7-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0
|
|
; GFX7-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX7-NEXT: br label %[[DO_BODY:.*]]
|
|
; GFX7: [[DO_BODY]]:
|
|
; GFX7-NEXT: [[TMP1:%.*]] = phi <4 x i8> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP2:%.*]], %[[DO_BODY]] ]
|
|
; GFX7-NEXT: [[TMP2]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX7-NEXT: store <4 x i8> [[TMP1]], ptr addrspace(3) [[GEP0]], align 2
|
|
; GFX7-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX7-NEXT: store <16 x i8> [[TMP3]], ptr addrspace(3) [[INPTR1]], align 2
|
|
; GFX7-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0
|
|
; GFX7-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]]
|
|
; GFX7: [[EXIT]]:
|
|
; GFX7-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT]], align 16
|
|
; GFX7-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1]], align 16
|
|
; GFX7-NEXT: ret void
|
|
;
|
|
; GFX8-LABEL: define protected amdgpu_kernel void @phi_4_with_stores(
|
|
; GFX8-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX8-NEXT: [[ENTRY:.*]]:
|
|
; GFX8-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0
|
|
; GFX8-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX8-NEXT: br label %[[DO_BODY:.*]]
|
|
; GFX8: [[DO_BODY]]:
|
|
; GFX8-NEXT: [[TMP1:%.*]] = phi <4 x i8> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP2:%.*]], %[[DO_BODY]] ]
|
|
; GFX8-NEXT: [[TMP2]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX8-NEXT: store <4 x i8> [[TMP1]], ptr addrspace(3) [[GEP0]], align 2
|
|
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX8-NEXT: store <16 x i8> [[TMP3]], ptr addrspace(3) [[INPTR1]], align 2
|
|
; GFX8-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0
|
|
; GFX8-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]]
|
|
; GFX8: [[EXIT]]:
|
|
; GFX8-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT]], align 16
|
|
; GFX8-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1]], align 16
|
|
; GFX8-NEXT: ret void
|
|
;
|
|
; GFX9-LABEL: define protected amdgpu_kernel void @phi_4_with_stores(
|
|
; GFX9-SAME: ptr addrspace(3) [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: [[ENTRY:.*]]:
|
|
; GFX9-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR0]], i32 0
|
|
; GFX9-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX9-NEXT: br label %[[DO_BODY:.*]]
|
|
; GFX9: [[DO_BODY]]:
|
|
; GFX9-NEXT: [[TMP1:%.*]] = phi <4 x i8> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP2:%.*]], %[[DO_BODY]] ]
|
|
; GFX9-NEXT: [[TMP2]] = load <4 x i8>, ptr addrspace(3) [[GEP0]], align 8
|
|
; GFX9-NEXT: store <4 x i8> [[TMP1]], ptr addrspace(3) [[GEP0]], align 2
|
|
; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
; GFX9-NEXT: store <16 x i8> [[TMP3]], ptr addrspace(3) [[INPTR1]], align 2
|
|
; GFX9-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0
|
|
; GFX9-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]]
|
|
; GFX9: [[EXIT]]:
|
|
; GFX9-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT]], align 16
|
|
; GFX9-NEXT: store <16 x i8> [[TMP3]], ptr [[OUT1]], align 16
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
entry:
|
|
%gep0 = getelementptr i8, ptr addrspace(3) %inptr0, i32 0
|
|
%ele0 = load i8, ptr addrspace(3) %gep0, align 8
|
|
%gep1 = getelementptr i8, ptr addrspace(3) %inptr0, i32 1
|
|
%ele1 = load i8, ptr addrspace(3) %gep1, align 1
|
|
%gep2 = getelementptr i8, ptr addrspace(3) %inptr0, i32 2
|
|
%ele2 = load i8, ptr addrspace(3) %gep2, align 2
|
|
%gep3 = getelementptr i8, ptr addrspace(3) %inptr0, i32 3
|
|
%ele3 = load i8, ptr addrspace(3) %gep3, align 1
|
|
br label %do.body
|
|
|
|
do.body:
|
|
%phi0 = phi i8 [ %ele3, %entry ], [ %otherele3, %do.body ]
|
|
%phi1 = phi i8 [ %ele2, %entry ], [ %otherele2, %do.body ]
|
|
%phi2 = phi i8 [ %ele1, %entry ], [ %otherele1, %do.body ]
|
|
%phi3 = phi i8 [ %ele0, %entry ], [ %otherele0, %do.body ]
|
|
%otherele0 = load i8, ptr addrspace(3) %gep0, align 8
|
|
%otherele1 = load i8, ptr addrspace(3) %gep1, align 1
|
|
%otherele2 = load i8, ptr addrspace(3) %gep2, align 2
|
|
%otherele3 = load i8, ptr addrspace(3) %gep3, align 1
|
|
store i8 %phi3, ptr addrspace(3) %gep0, align 2
|
|
store i8 %phi2, ptr addrspace(3) %gep1, align 2
|
|
store i8 %phi1, ptr addrspace(3) %gep2, align 2
|
|
store i8 %phi0, ptr addrspace(3) %gep3, align 2
|
|
%vec10 = insertelement <16 x i8> poison, i8 %phi3, i64 8
|
|
%vec11 = insertelement <16 x i8> %vec10, i8 %phi2, i64 9
|
|
%vec12 = insertelement <16 x i8> %vec11, i8 %phi1, i64 10
|
|
%vec13 = insertelement <16 x i8> %vec12, i8 %phi0, i64 11
|
|
store <16 x i8> %vec13, ptr addrspace(3) %inptr1, align 2
|
|
%cmp = icmp eq i32 %flag, 0
|
|
br i1 %cmp, label %exit, label %do.body
|
|
|
|
exit:
|
|
store <16 x i8> %vec13, ptr %out
|
|
store <16 x i8> %vec13, ptr %out1
|
|
ret void
|
|
}
|
|
|
|
define protected amdgpu_kernel void @phi_4_with_stores_outside_loop(<4 x i8> %inptr0, ptr addrspace(3) %inptr1, ptr %out, ptr addrspace(3) %out1, i32 %flag) {
|
|
; GFX7-LABEL: define protected amdgpu_kernel void @phi_4_with_stores_outside_loop(
|
|
; GFX7-SAME: <4 x i8> [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr addrspace(3) [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX7-NEXT: [[ENTRY:.*]]:
|
|
; GFX7-NEXT: br label %[[DO_BODY:.*]]
|
|
; GFX7: [[DO_BODY]]:
|
|
; GFX7-NEXT: [[TMP0:%.*]] = phi <4 x i8> [ [[INPTR0]], %[[ENTRY]] ], [ [[INPTR0]], %[[DO_BODY]] ]
|
|
; GFX7-NEXT: [[GEP4:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR1]], i32 8
|
|
; GFX7-NEXT: store <4 x i8> [[TMP0]], ptr addrspace(3) [[GEP4]], align 2
|
|
; GFX7-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0
|
|
; GFX7-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]]
|
|
; GFX7: [[EXIT]]:
|
|
; GFX7-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[OUT1]], i32 0
|
|
; GFX7-NEXT: store <4 x i8> [[INPTR0]], ptr addrspace(3) [[GEP0]], align 1
|
|
; GFX7-NEXT: ret void
|
|
;
|
|
; GFX8-LABEL: define protected amdgpu_kernel void @phi_4_with_stores_outside_loop(
|
|
; GFX8-SAME: <4 x i8> [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr addrspace(3) [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX8-NEXT: [[ENTRY:.*]]:
|
|
; GFX8-NEXT: br label %[[DO_BODY:.*]]
|
|
; GFX8: [[DO_BODY]]:
|
|
; GFX8-NEXT: [[TMP0:%.*]] = phi <4 x i8> [ [[INPTR0]], %[[ENTRY]] ], [ [[INPTR0]], %[[DO_BODY]] ]
|
|
; GFX8-NEXT: [[GEP4:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR1]], i32 8
|
|
; GFX8-NEXT: store <4 x i8> [[TMP0]], ptr addrspace(3) [[GEP4]], align 2
|
|
; GFX8-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0
|
|
; GFX8-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]]
|
|
; GFX8: [[EXIT]]:
|
|
; GFX8-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[OUT1]], i32 0
|
|
; GFX8-NEXT: store <4 x i8> [[INPTR0]], ptr addrspace(3) [[GEP0]], align 1
|
|
; GFX8-NEXT: ret void
|
|
;
|
|
; GFX9-LABEL: define protected amdgpu_kernel void @phi_4_with_stores_outside_loop(
|
|
; GFX9-SAME: <4 x i8> [[INPTR0:%.*]], ptr addrspace(3) [[INPTR1:%.*]], ptr [[OUT:%.*]], ptr addrspace(3) [[OUT1:%.*]], i32 [[FLAG:%.*]]) #[[ATTR0]] {
|
|
; GFX9-NEXT: [[ENTRY:.*]]:
|
|
; GFX9-NEXT: br label %[[DO_BODY:.*]]
|
|
; GFX9: [[DO_BODY]]:
|
|
; GFX9-NEXT: [[TMP0:%.*]] = phi <4 x i8> [ [[INPTR0]], %[[ENTRY]] ], [ [[INPTR0]], %[[DO_BODY]] ]
|
|
; GFX9-NEXT: [[GEP4:%.*]] = getelementptr i8, ptr addrspace(3) [[INPTR1]], i32 8
|
|
; GFX9-NEXT: store <4 x i8> [[TMP0]], ptr addrspace(3) [[GEP4]], align 2
|
|
; GFX9-NEXT: [[CMP:%.*]] = icmp eq i32 [[FLAG]], 0
|
|
; GFX9-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[DO_BODY]]
|
|
; GFX9: [[EXIT]]:
|
|
; GFX9-NEXT: [[GEP0:%.*]] = getelementptr i8, ptr addrspace(3) [[OUT1]], i32 0
|
|
; GFX9-NEXT: store <4 x i8> [[INPTR0]], ptr addrspace(3) [[GEP0]], align 1
|
|
; GFX9-NEXT: ret void
|
|
;
|
|
entry:
|
|
%ele0 = extractelement <4 x i8> %inptr0, i32 0
|
|
%ele1 = extractelement <4 x i8> %inptr0, i32 1
|
|
%ele2 = extractelement <4 x i8> %inptr0, i32 2
|
|
%ele3 = extractelement <4 x i8> %inptr0, i32 3
|
|
br label %do.body
|
|
|
|
do.body:
|
|
%phi0 = phi i8 [ %ele3, %entry ], [ %otherele3, %do.body ]
|
|
%phi1 = phi i8 [ %ele2, %entry ], [ %otherele2, %do.body ]
|
|
%phi2 = phi i8 [ %ele1, %entry ], [ %otherele1, %do.body ]
|
|
%phi3 = phi i8 [ %ele0, %entry ], [ %otherele0, %do.body ]
|
|
%otherele0 = extractelement <4 x i8> %inptr0, i32 0
|
|
%otherele1 = extractelement <4 x i8> %inptr0, i32 1
|
|
%otherele2 = extractelement <4 x i8> %inptr0, i32 2
|
|
%otherele3 = extractelement <4 x i8> %inptr0, i32 3
|
|
%gep4 = getelementptr i8, ptr addrspace(3) %inptr1, i32 8
|
|
store i8 %phi3, ptr addrspace(3) %gep4, align 2
|
|
%gep5 = getelementptr i8, ptr addrspace(3) %inptr1, i32 9
|
|
store i8 %phi2, ptr addrspace(3) %gep5, align 2
|
|
%gep6 = getelementptr i8, ptr addrspace(3) %inptr1, i32 10
|
|
store i8 %phi1, ptr addrspace(3) %gep6, align 2
|
|
%gep7 = getelementptr i8, ptr addrspace(3) %inptr1, i32 11
|
|
store i8 %phi0, ptr addrspace(3) %gep7, align 2
|
|
%cmp = icmp eq i32 %flag, 0
|
|
br i1 %cmp, label %exit, label %do.body
|
|
|
|
exit:
|
|
%gep0 = getelementptr i8, ptr addrspace(3) %out1, i32 0
|
|
%gep1 = getelementptr i8, ptr addrspace(3) %out1, i32 1
|
|
%gep2 = getelementptr i8, ptr addrspace(3) %out1, i32 2
|
|
%gep3 = getelementptr i8, ptr addrspace(3) %out1, i32 3
|
|
store i8 %otherele0, ptr addrspace(3) %gep0
|
|
store i8 %otherele1, ptr addrspace(3) %gep1
|
|
store i8 %otherele2, ptr addrspace(3) %gep2
|
|
store i8 %otherele3, ptr addrspace(3) %gep3
|
|
ret void
|
|
}
|