;; #pragma OPENCL EXTENSION cl_khr_subgroup_ballot : enable ;; #pragma OPENCL EXTENSION cl_khr_fp16 : enable ;; #pragma OPENCL EXTENSION cl_khr_fp64 : enable ;; ;; kernel void testNonUniformBroadcastChars() ;; { ;; char16 v = 0; ;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); ;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); ;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); ;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); ;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); ;; v = sub_group_non_uniform_broadcast(v, 0); ;; v.s0 = sub_group_broadcast_first(v.s0); ;; } ;; ;; kernel void testNonUniformBroadcastUChars() ;; { ;; uchar16 v = 0; ;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); ;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); ;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); ;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); ;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); ;; v = sub_group_non_uniform_broadcast(v, 0); ;; v.s0 = sub_group_broadcast_first(v.s0); ;; } ;; ;; kernel void testNonUniformBroadcastShorts() ;; { ;; short16 v = 0; ;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); ;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); ;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); ;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); ;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); ;; v = sub_group_non_uniform_broadcast(v, 0); ;; v.s0 = sub_group_broadcast_first(v.s0); ;; } ;; ;; kernel void testNonUniformBroadcastUShorts() ;; { ;; ushort16 v = 0; ;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); ;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); ;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); ;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); ;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); ;; v = sub_group_non_uniform_broadcast(v, 0); ;; v.s0 = sub_group_broadcast_first(v.s0); ;; } ;; ;; kernel void testNonUniformBroadcastInts() ;; { ;; int16 v = 0; ;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); ;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); ;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); ;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); ;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); ;; v = sub_group_non_uniform_broadcast(v, 0); ;; v.s0 = sub_group_broadcast_first(v.s0); ;; } ;; ;; kernel void testNonUniformBroadcastUInts() ;; { ;; uint16 v = 0; ;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); ;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); ;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); ;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); ;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); ;; v = sub_group_non_uniform_broadcast(v, 0); ;; v.s0 = sub_group_broadcast_first(v.s0); ;; } ;; ;; kernel void testNonUniformBroadcastLongs() ;; { ;; long16 v = 0; ;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); ;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); ;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); ;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); ;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); ;; v = sub_group_non_uniform_broadcast(v, 0); ;; v.s0 = sub_group_broadcast_first(v.s0); ;; } ;; ;; kernel void testNonUniformBroadcastULongs() ;; { ;; ulong16 v = 0; ;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); ;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); ;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); ;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); ;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); ;; v = sub_group_non_uniform_broadcast(v, 0); ;; v.s0 = sub_group_broadcast_first(v.s0); ;; } ;; ;; kernel void testNonUniformBroadcastFloats() ;; { ;; float16 v = 0; ;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); ;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); ;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); ;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); ;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); ;; v = sub_group_non_uniform_broadcast(v, 0); ;; v.s0 = sub_group_broadcast_first(v.s0); ;; } ;; ;; kernel void testNonUniformBroadcastHalfs() ;; { ;; half16 v = 0; ;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); ;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); ;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); ;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); ;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); ;; v = sub_group_non_uniform_broadcast(v, 0); ;; v.s0 = sub_group_broadcast_first(v.s0); ;; } ;; ;; kernel void testNonUniformBroadcastDoubles() ;; { ;; double16 v = 0; ;; v.s0 = sub_group_non_uniform_broadcast(v.s0, 0); ;; v.s01 = sub_group_non_uniform_broadcast(v.s01, 0); ;; v.s012 = sub_group_non_uniform_broadcast(v.s012, 0); ;; v.s0123 = sub_group_non_uniform_broadcast(v.s0123, 0); ;; v.s01234567 = sub_group_non_uniform_broadcast(v.s01234567, 0); ;; v = sub_group_non_uniform_broadcast(v, 0); ;; v.s0 = sub_group_broadcast_first(v.s0); ;; } ;; ;; kernel void testBallotOperations(global uint* dst) ;; { ;; uint4 v = sub_group_ballot(0); ;; dst[0] = sub_group_inverse_ballot(v); ;; dst[1] = sub_group_ballot_bit_extract(v, 0); ;; dst[2] = sub_group_ballot_bit_count(v); ;; dst[3] = sub_group_ballot_inclusive_scan(v); ;; dst[4] = sub_group_ballot_exclusive_scan(v); ;; dst[5] = sub_group_ballot_find_lsb(v); ;; dst[6] = sub_group_ballot_find_msb(v); ;; } ;; ;; kernel void testSubgroupMasks(global uint4* dst) ;; { ;; dst[0] = get_sub_group_eq_mask(); ;; dst[1] = get_sub_group_ge_mask(); ;; dst[2] = get_sub_group_gt_mask(); ;; dst[3] = get_sub_group_le_mask(); ;; dst[4] = get_sub_group_lt_mask(); ;; } ; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV ; CHECK-SPIRV-DAG: OpCapability GroupNonUniformBallot ; CHECK-SPIRV-DAG: OpDecorate %[[#eqMask:]] BuiltIn SubgroupEqMask ; CHECK-SPIRV-DAG: OpDecorate %[[#geMask:]] BuiltIn SubgroupGeMask ; CHECK-SPIRV-DAG: OpDecorate %[[#gtMask:]] BuiltIn SubgroupGtMask ; CHECK-SPIRV-DAG: OpDecorate %[[#leMask:]] BuiltIn SubgroupLeMask ; CHECK-SPIRV-DAG: OpDecorate %[[#ltMask:]] BuiltIn SubgroupLtMask ; CHECK-SPIRV-DAG: %[[#bool:]] = OpTypeBool ; CHECK-SPIRV-DAG: %[[#char:]] = OpTypeInt 8 0 ; CHECK-SPIRV-DAG: %[[#short:]] = OpTypeInt 16 0 ; CHECK-SPIRV-DAG: %[[#int:]] = OpTypeInt 32 0 ; CHECK-SPIRV-DAG: %[[#long:]] = OpTypeInt 64 0 ; CHECK-SPIRV-DAG: %[[#half:]] = OpTypeFloat 16 ; CHECK-SPIRV-DAG: %[[#float:]] = OpTypeFloat 32 ; CHECK-SPIRV-DAG: %[[#double:]] = OpTypeFloat 64 ; CHECK-SPIRV-DAG: %[[#char2:]] = OpTypeVector %[[#char]] 2 ; CHECK-SPIRV-DAG: %[[#char3:]] = OpTypeVector %[[#char]] 3 ; CHECK-SPIRV-DAG: %[[#char4:]] = OpTypeVector %[[#char]] 4 ; CHECK-SPIRV-DAG: %[[#char8:]] = OpTypeVector %[[#char]] 8 ; CHECK-SPIRV-DAG: %[[#char16:]] = OpTypeVector %[[#char]] 16 ; CHECK-SPIRV-DAG: %[[#short2:]] = OpTypeVector %[[#short]] 2 ; CHECK-SPIRV-DAG: %[[#short3:]] = OpTypeVector %[[#short]] 3 ; CHECK-SPIRV-DAG: %[[#short4:]] = OpTypeVector %[[#short]] 4 ; CHECK-SPIRV-DAG: %[[#short8:]] = OpTypeVector %[[#short]] 8 ; CHECK-SPIRV-DAG: %[[#short16:]] = OpTypeVector %[[#short]] 16 ; CHECK-SPIRV-DAG: %[[#int2:]] = OpTypeVector %[[#int]] 2 ; CHECK-SPIRV-DAG: %[[#int3:]] = OpTypeVector %[[#int]] 3 ; CHECK-SPIRV-DAG: %[[#int4:]] = OpTypeVector %[[#int]] 4 ; CHECK-SPIRV-DAG: %[[#int8:]] = OpTypeVector %[[#int]] 8 ; CHECK-SPIRV-DAG: %[[#int16:]] = OpTypeVector %[[#int]] 16 ; CHECK-SPIRV-DAG: %[[#long2:]] = OpTypeVector %[[#long]] 2 ; CHECK-SPIRV-DAG: %[[#long3:]] = OpTypeVector %[[#long]] 3 ; CHECK-SPIRV-DAG: %[[#long4:]] = OpTypeVector %[[#long]] 4 ; CHECK-SPIRV-DAG: %[[#long8:]] = OpTypeVector %[[#long]] 8 ; CHECK-SPIRV-DAG: %[[#long16:]] = OpTypeVector %[[#long]] 16 ; CHECK-SPIRV-DAG: %[[#float2:]] = OpTypeVector %[[#float]] 2 ; CHECK-SPIRV-DAG: %[[#float3:]] = OpTypeVector %[[#float]] 3 ; CHECK-SPIRV-DAG: %[[#float4:]] = OpTypeVector %[[#float]] 4 ; CHECK-SPIRV-DAG: %[[#float8:]] = OpTypeVector %[[#float]] 8 ; CHECK-SPIRV-DAG: %[[#float16:]] = OpTypeVector %[[#float]] 16 ; CHECK-SPIRV-DAG: %[[#half2:]] = OpTypeVector %[[#half]] 2 ; CHECK-SPIRV-DAG: %[[#half3:]] = OpTypeVector %[[#half]] 3 ; CHECK-SPIRV-DAG: %[[#half4:]] = OpTypeVector %[[#half]] 4 ; CHECK-SPIRV-DAG: %[[#half8:]] = OpTypeVector %[[#half]] 8 ; CHECK-SPIRV-DAG: %[[#half16:]] = OpTypeVector %[[#half]] 16 ; CHECK-SPIRV-DAG: %[[#double2:]] = OpTypeVector %[[#double]] 2 ; CHECK-SPIRV-DAG: %[[#double3:]] = OpTypeVector %[[#double]] 3 ; CHECK-SPIRV-DAG: %[[#double4:]] = OpTypeVector %[[#double]] 4 ; CHECK-SPIRV-DAG: %[[#double8:]] = OpTypeVector %[[#double]] 8 ; CHECK-SPIRV-DAG: %[[#double16:]] = OpTypeVector %[[#double]] 16 ; CHECK-SPIRV-DAG: %[[#false:]] = OpConstantFalse %[[#bool]] ; CHECK-SPIRV-DAG: %[[#ScopeSubgroup:]] = OpConstant %[[#int]] 3 ; CHECK-SPIRV-DAG: %[[#char_0:]] = OpConstant %[[#char]] 0 ; CHECK-SPIRV-DAG: %[[#short_0:]] = OpConstant %[[#short]] 0 ; CHECK-SPIRV-DAG: %[[#int_0:]] = OpConstant %[[#int]] 0 ; CHECK-SPIRV-DAG: %[[#long_0:]] = OpConstantNull %[[#long]] ; CHECK-SPIRV-DAG: %[[#half_0:]] = OpConstant %[[#half]] 0 ; CHECK-SPIRV-DAG: %[[#float_0:]] = OpConstant %[[#float]] 0 ; CHECK-SPIRV-DAG: %[[#double_0:]] = OpConstant %[[#double]] 0 ; CHECK-SPIRV: OpFunction ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#char2_0:]] = OpVectorShuffle %[[#char2]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char2]] %[[#ScopeSubgroup]] %[[#char2_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#char3_0:]] = OpVectorShuffle %[[#char3]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char3]] %[[#ScopeSubgroup]] %[[#char3_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#char4_0:]] = OpVectorShuffle %[[#char4]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char4]] %[[#ScopeSubgroup]] %[[#char4_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#char8_0:]] = OpVectorShuffle %[[#char8]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char8]] %[[#ScopeSubgroup]] %[[#char8_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#char16]] ; CHECK-SPIRV: %[[#char16_0:]] = OpVectorShuffle %[[#char16]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char16]] %[[#ScopeSubgroup]] %[[#char16_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#char_value:]] = OpCompositeExtract %[[#char]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#char]] %[[#ScopeSubgroup]] %[[#char_value]] ; CHECK-SPIRV: OpFunctionEnd define dso_local spir_kernel void @testNonUniformBroadcastChars() local_unnamed_addr { %1 = tail call spir_func signext i8 @_Z31sub_group_non_uniform_broadcastcj(i8 signext 0, i32 0) %2 = insertelement <16 x i8> , i8 %1, i64 0 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <2 x i32> %4 = tail call spir_func <2 x i8> @_Z31sub_group_non_uniform_broadcastDv2_cj(<2 x i8> %3, i32 0) %5 = shufflevector <2 x i8> %4, <2 x i8> undef, <16 x i32> %6 = shufflevector <16 x i8> %5, <16 x i8> %2, <16 x i32> %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <3 x i32> %8 = tail call spir_func <3 x i8> @_Z31sub_group_non_uniform_broadcastDv3_cj(<3 x i8> %7, i32 0) %9 = shufflevector <3 x i8> %8, <3 x i8> undef, <16 x i32> %10 = shufflevector <16 x i8> %9, <16 x i8> %6, <16 x i32> %11 = shufflevector <16 x i8> %10, <16 x i8> undef, <4 x i32> %12 = tail call spir_func <4 x i8> @_Z31sub_group_non_uniform_broadcastDv4_cj(<4 x i8> %11, i32 0) %13 = shufflevector <4 x i8> %12, <4 x i8> undef, <16 x i32> %14 = shufflevector <16 x i8> %13, <16 x i8> %10, <16 x i32> %15 = shufflevector <16 x i8> %14, <16 x i8> undef, <8 x i32> %16 = tail call spir_func <8 x i8> @_Z31sub_group_non_uniform_broadcastDv8_cj(<8 x i8> %15, i32 0) %17 = shufflevector <8 x i8> %16, <8 x i8> undef, <16 x i32> %18 = shufflevector <16 x i8> %17, <16 x i8> %14, <16 x i32> %19 = tail call spir_func <16 x i8> @_Z31sub_group_non_uniform_broadcastDv16_cj(<16 x i8> %18, i32 0) %20 = extractelement <16 x i8> %19, i64 0 %21 = tail call spir_func signext i8 @_Z25sub_group_broadcast_firstc(i8 signext %20) ret void } declare dso_local spir_func signext i8 @_Z31sub_group_non_uniform_broadcastcj(i8 signext, i32) local_unnamed_addr declare dso_local spir_func <2 x i8> @_Z31sub_group_non_uniform_broadcastDv2_cj(<2 x i8>, i32) local_unnamed_addr declare dso_local spir_func <3 x i8> @_Z31sub_group_non_uniform_broadcastDv3_cj(<3 x i8>, i32) local_unnamed_addr declare dso_local spir_func <4 x i8> @_Z31sub_group_non_uniform_broadcastDv4_cj(<4 x i8>, i32) local_unnamed_addr declare dso_local spir_func <8 x i8> @_Z31sub_group_non_uniform_broadcastDv8_cj(<8 x i8>, i32) local_unnamed_addr declare dso_local spir_func <16 x i8> @_Z31sub_group_non_uniform_broadcastDv16_cj(<16 x i8>, i32) local_unnamed_addr declare dso_local spir_func signext i8 @_Z25sub_group_broadcast_firstc(i8 signext) local_unnamed_addr ; CHECK-SPIRV: OpFunction ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char]] %[[#ScopeSubgroup]] %[[#char_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#char2_0:]] = OpVectorShuffle %[[#char2]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char2]] %[[#ScopeSubgroup]] %[[#char2_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#char3_0:]] = OpVectorShuffle %[[#char3]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char3]] %[[#ScopeSubgroup]] %[[#char3_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#char4_0:]] = OpVectorShuffle %[[#char4]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char4]] %[[#ScopeSubgroup]] %[[#char4_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#char8_0:]] = OpVectorShuffle %[[#char8]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char8]] %[[#ScopeSubgroup]] %[[#char8_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#char16]] ; CHECK-SPIRV: %[[#char16_0:]] = OpVectorShuffle %[[#char16]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#char16]] %[[#ScopeSubgroup]] %[[#char16_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#char_value:]] = OpCompositeExtract %[[#char]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#char]] %[[#ScopeSubgroup]] %[[#char_value]] ; CHECK-SPIRV: OpFunctionEnd define dso_local spir_kernel void @testNonUniformBroadcastUChars() local_unnamed_addr { %1 = tail call spir_func zeroext i8 @_Z31sub_group_non_uniform_broadcasthj(i8 zeroext 0, i32 0) %2 = insertelement <16 x i8> , i8 %1, i64 0 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <2 x i32> %4 = tail call spir_func <2 x i8> @_Z31sub_group_non_uniform_broadcastDv2_hj(<2 x i8> %3, i32 0) %5 = shufflevector <2 x i8> %4, <2 x i8> undef, <16 x i32> %6 = shufflevector <16 x i8> %5, <16 x i8> %2, <16 x i32> %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <3 x i32> %8 = tail call spir_func <3 x i8> @_Z31sub_group_non_uniform_broadcastDv3_hj(<3 x i8> %7, i32 0) %9 = shufflevector <3 x i8> %8, <3 x i8> undef, <16 x i32> %10 = shufflevector <16 x i8> %9, <16 x i8> %6, <16 x i32> %11 = shufflevector <16 x i8> %10, <16 x i8> undef, <4 x i32> %12 = tail call spir_func <4 x i8> @_Z31sub_group_non_uniform_broadcastDv4_hj(<4 x i8> %11, i32 0) %13 = shufflevector <4 x i8> %12, <4 x i8> undef, <16 x i32> %14 = shufflevector <16 x i8> %13, <16 x i8> %10, <16 x i32> %15 = shufflevector <16 x i8> %14, <16 x i8> undef, <8 x i32> %16 = tail call spir_func <8 x i8> @_Z31sub_group_non_uniform_broadcastDv8_hj(<8 x i8> %15, i32 0) %17 = shufflevector <8 x i8> %16, <8 x i8> undef, <16 x i32> %18 = shufflevector <16 x i8> %17, <16 x i8> %14, <16 x i32> %19 = tail call spir_func <16 x i8> @_Z31sub_group_non_uniform_broadcastDv16_hj(<16 x i8> %18, i32 0) %20 = extractelement <16 x i8> %19, i64 0 %21 = tail call spir_func zeroext i8 @_Z25sub_group_broadcast_firsth(i8 zeroext %20) ret void } declare dso_local spir_func zeroext i8 @_Z31sub_group_non_uniform_broadcasthj(i8 zeroext, i32) local_unnamed_addr declare dso_local spir_func <2 x i8> @_Z31sub_group_non_uniform_broadcastDv2_hj(<2 x i8>, i32) local_unnamed_addr declare dso_local spir_func <3 x i8> @_Z31sub_group_non_uniform_broadcastDv3_hj(<3 x i8>, i32) local_unnamed_addr declare dso_local spir_func <4 x i8> @_Z31sub_group_non_uniform_broadcastDv4_hj(<4 x i8>, i32) local_unnamed_addr declare dso_local spir_func <8 x i8> @_Z31sub_group_non_uniform_broadcastDv8_hj(<8 x i8>, i32) local_unnamed_addr declare dso_local spir_func <16 x i8> @_Z31sub_group_non_uniform_broadcastDv16_hj(<16 x i8>, i32) local_unnamed_addr declare dso_local spir_func zeroext i8 @_Z25sub_group_broadcast_firsth(i8 zeroext) local_unnamed_addr ; CHECK-SPIRV: OpFunction ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short]] %[[#ScopeSubgroup]] %[[#short_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#short2_0:]] = OpVectorShuffle %[[#short2]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short2]] %[[#ScopeSubgroup]] %[[#short2_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#short3_0:]] = OpVectorShuffle %[[#short3]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short3]] %[[#ScopeSubgroup]] %[[#short3_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#short4_0:]] = OpVectorShuffle %[[#short4]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short4]] %[[#ScopeSubgroup]] %[[#short4_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#short8_0:]] = OpVectorShuffle %[[#short8]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short8]] %[[#ScopeSubgroup]] %[[#short8_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#short16]] ; CHECK-SPIRV: %[[#short16_0:]] = OpVectorShuffle %[[#short16]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short16]] %[[#ScopeSubgroup]] %[[#short16_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#short_value:]] = OpCompositeExtract %[[#short]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#short]] %[[#ScopeSubgroup]] %[[#short_value]] ; CHECK-SPIRV: OpFunctionEnd define dso_local spir_kernel void @testNonUniformBroadcastShorts() local_unnamed_addr { %1 = tail call spir_func signext i16 @_Z31sub_group_non_uniform_broadcastsj(i16 signext 0, i32 0) %2 = insertelement <16 x i16> , i16 %1, i64 0 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <2 x i32> %4 = tail call spir_func <2 x i16> @_Z31sub_group_non_uniform_broadcastDv2_sj(<2 x i16> %3, i32 0) %5 = shufflevector <2 x i16> %4, <2 x i16> undef, <16 x i32> %6 = shufflevector <16 x i16> %5, <16 x i16> %2, <16 x i32> %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <3 x i32> %8 = tail call spir_func <3 x i16> @_Z31sub_group_non_uniform_broadcastDv3_sj(<3 x i16> %7, i32 0) %9 = shufflevector <3 x i16> %8, <3 x i16> undef, <16 x i32> %10 = shufflevector <16 x i16> %9, <16 x i16> %6, <16 x i32> %11 = shufflevector <16 x i16> %10, <16 x i16> undef, <4 x i32> %12 = tail call spir_func <4 x i16> @_Z31sub_group_non_uniform_broadcastDv4_sj(<4 x i16> %11, i32 0) %13 = shufflevector <4 x i16> %12, <4 x i16> undef, <16 x i32> %14 = shufflevector <16 x i16> %13, <16 x i16> %10, <16 x i32> %15 = shufflevector <16 x i16> %14, <16 x i16> undef, <8 x i32> %16 = tail call spir_func <8 x i16> @_Z31sub_group_non_uniform_broadcastDv8_sj(<8 x i16> %15, i32 0) %17 = shufflevector <8 x i16> %16, <8 x i16> undef, <16 x i32> %18 = shufflevector <16 x i16> %17, <16 x i16> %14, <16 x i32> %19 = tail call spir_func <16 x i16> @_Z31sub_group_non_uniform_broadcastDv16_sj(<16 x i16> %18, i32 0) %20 = extractelement <16 x i16> %19, i64 0 %21 = tail call spir_func signext i16 @_Z25sub_group_broadcast_firsts(i16 signext %20) ret void } declare dso_local spir_func signext i16 @_Z31sub_group_non_uniform_broadcastsj(i16 signext, i32) local_unnamed_addr declare dso_local spir_func <2 x i16> @_Z31sub_group_non_uniform_broadcastDv2_sj(<2 x i16>, i32) local_unnamed_addr declare dso_local spir_func <3 x i16> @_Z31sub_group_non_uniform_broadcastDv3_sj(<3 x i16>, i32) local_unnamed_addr declare dso_local spir_func <4 x i16> @_Z31sub_group_non_uniform_broadcastDv4_sj(<4 x i16>, i32) local_unnamed_addr declare dso_local spir_func <8 x i16> @_Z31sub_group_non_uniform_broadcastDv8_sj(<8 x i16>, i32) local_unnamed_addr declare dso_local spir_func <16 x i16> @_Z31sub_group_non_uniform_broadcastDv16_sj(<16 x i16>, i32) local_unnamed_addr declare dso_local spir_func signext i16 @_Z25sub_group_broadcast_firsts(i16 signext) local_unnamed_addr ; CHECK-SPIRV: OpFunction ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short]] %[[#ScopeSubgroup]] %[[#short_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#short2_0:]] = OpVectorShuffle %[[#short2]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short2]] %[[#ScopeSubgroup]] %[[#short2_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#short3_0:]] = OpVectorShuffle %[[#short3]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short3]] %[[#ScopeSubgroup]] %[[#short3_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#short4_0:]] = OpVectorShuffle %[[#short4]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short4]] %[[#ScopeSubgroup]] %[[#short4_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#short8_0:]] = OpVectorShuffle %[[#short8]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short8]] %[[#ScopeSubgroup]] %[[#short8_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#short16]] ; CHECK-SPIRV: %[[#short16_0:]] = OpVectorShuffle %[[#short16]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#short16]] %[[#ScopeSubgroup]] %[[#short16_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#short_value:]] = OpCompositeExtract %[[#short]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#short]] %[[#ScopeSubgroup]] %[[#short_value]] ; CHECK-SPIRV: OpFunctionEnd define dso_local spir_kernel void @testNonUniformBroadcastUShorts() local_unnamed_addr { %1 = tail call spir_func zeroext i16 @_Z31sub_group_non_uniform_broadcasttj(i16 zeroext 0, i32 0) %2 = insertelement <16 x i16> , i16 %1, i64 0 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <2 x i32> %4 = tail call spir_func <2 x i16> @_Z31sub_group_non_uniform_broadcastDv2_tj(<2 x i16> %3, i32 0) %5 = shufflevector <2 x i16> %4, <2 x i16> undef, <16 x i32> %6 = shufflevector <16 x i16> %5, <16 x i16> %2, <16 x i32> %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <3 x i32> %8 = tail call spir_func <3 x i16> @_Z31sub_group_non_uniform_broadcastDv3_tj(<3 x i16> %7, i32 0) %9 = shufflevector <3 x i16> %8, <3 x i16> undef, <16 x i32> %10 = shufflevector <16 x i16> %9, <16 x i16> %6, <16 x i32> %11 = shufflevector <16 x i16> %10, <16 x i16> undef, <4 x i32> %12 = tail call spir_func <4 x i16> @_Z31sub_group_non_uniform_broadcastDv4_tj(<4 x i16> %11, i32 0) %13 = shufflevector <4 x i16> %12, <4 x i16> undef, <16 x i32> %14 = shufflevector <16 x i16> %13, <16 x i16> %10, <16 x i32> %15 = shufflevector <16 x i16> %14, <16 x i16> undef, <8 x i32> %16 = tail call spir_func <8 x i16> @_Z31sub_group_non_uniform_broadcastDv8_tj(<8 x i16> %15, i32 0) %17 = shufflevector <8 x i16> %16, <8 x i16> undef, <16 x i32> %18 = shufflevector <16 x i16> %17, <16 x i16> %14, <16 x i32> %19 = tail call spir_func <16 x i16> @_Z31sub_group_non_uniform_broadcastDv16_tj(<16 x i16> %18, i32 0) %20 = extractelement <16 x i16> %19, i64 0 %21 = tail call spir_func zeroext i16 @_Z25sub_group_broadcast_firstt(i16 zeroext %20) ret void } declare dso_local spir_func zeroext i16 @_Z31sub_group_non_uniform_broadcasttj(i16 zeroext, i32) local_unnamed_addr declare dso_local spir_func <2 x i16> @_Z31sub_group_non_uniform_broadcastDv2_tj(<2 x i16>, i32) local_unnamed_addr declare dso_local spir_func <3 x i16> @_Z31sub_group_non_uniform_broadcastDv3_tj(<3 x i16>, i32) local_unnamed_addr declare dso_local spir_func <4 x i16> @_Z31sub_group_non_uniform_broadcastDv4_tj(<4 x i16>, i32) local_unnamed_addr declare dso_local spir_func <8 x i16> @_Z31sub_group_non_uniform_broadcastDv8_tj(<8 x i16>, i32) local_unnamed_addr declare dso_local spir_func <16 x i16> @_Z31sub_group_non_uniform_broadcastDv16_tj(<16 x i16>, i32) local_unnamed_addr declare dso_local spir_func zeroext i16 @_Z25sub_group_broadcast_firstt(i16 zeroext) local_unnamed_addr ; CHECK-SPIRV: OpFunction ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int]] %[[#ScopeSubgroup]] %[[#int_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#int2_0:]] = OpVectorShuffle %[[#int2]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int2]] %[[#ScopeSubgroup]] %[[#int2_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#int3_0:]] = OpVectorShuffle %[[#int3]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int3]] %[[#ScopeSubgroup]] %[[#int3_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#int4_0:]] = OpVectorShuffle %[[#int4]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int4]] %[[#ScopeSubgroup]] %[[#int4_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#int8_0:]] = OpVectorShuffle %[[#int8]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int8]] %[[#ScopeSubgroup]] %[[#int8_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#int16]] ; CHECK-SPIRV: %[[#int16_0:]] = OpVectorShuffle %[[#int16]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int16]] %[[#ScopeSubgroup]] %[[#int16_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#int_value:]] = OpCompositeExtract %[[#int]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#int]] %[[#ScopeSubgroup]] %[[#int_value]] ; CHECK-SPIRV: OpFunctionEnd define dso_local spir_kernel void @testNonUniformBroadcastInts() local_unnamed_addr { %1 = tail call spir_func i32 @_Z31sub_group_non_uniform_broadcastij(i32 0, i32 0) %2 = insertelement <16 x i32> , i32 %1, i64 0 %3 = shufflevector <16 x i32> %2, <16 x i32> undef, <2 x i32> %4 = tail call spir_func <2 x i32> @_Z31sub_group_non_uniform_broadcastDv2_ij(<2 x i32> %3, i32 0) %5 = shufflevector <2 x i32> %4, <2 x i32> undef, <16 x i32> %6 = shufflevector <16 x i32> %5, <16 x i32> %2, <16 x i32> %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <3 x i32> %8 = tail call spir_func <3 x i32> @_Z31sub_group_non_uniform_broadcastDv3_ij(<3 x i32> %7, i32 0) %9 = shufflevector <3 x i32> %8, <3 x i32> undef, <16 x i32> %10 = shufflevector <16 x i32> %9, <16 x i32> %6, <16 x i32> %11 = shufflevector <16 x i32> %10, <16 x i32> undef, <4 x i32> %12 = tail call spir_func <4 x i32> @_Z31sub_group_non_uniform_broadcastDv4_ij(<4 x i32> %11, i32 0) %13 = shufflevector <4 x i32> %12, <4 x i32> undef, <16 x i32> %14 = shufflevector <16 x i32> %13, <16 x i32> %10, <16 x i32> %15 = shufflevector <16 x i32> %14, <16 x i32> undef, <8 x i32> %16 = tail call spir_func <8 x i32> @_Z31sub_group_non_uniform_broadcastDv8_ij(<8 x i32> %15, i32 0) %17 = shufflevector <8 x i32> %16, <8 x i32> undef, <16 x i32> %18 = shufflevector <16 x i32> %17, <16 x i32> %14, <16 x i32> %19 = tail call spir_func <16 x i32> @_Z31sub_group_non_uniform_broadcastDv16_ij(<16 x i32> %18, i32 0) %20 = extractelement <16 x i32> %19, i64 0 %21 = tail call spir_func i32 @_Z25sub_group_broadcast_firsti(i32 %20) ret void } declare dso_local spir_func i32 @_Z31sub_group_non_uniform_broadcastij(i32, i32) local_unnamed_addr declare dso_local spir_func <2 x i32> @_Z31sub_group_non_uniform_broadcastDv2_ij(<2 x i32>, i32) local_unnamed_addr declare dso_local spir_func <3 x i32> @_Z31sub_group_non_uniform_broadcastDv3_ij(<3 x i32>, i32) local_unnamed_addr declare dso_local spir_func <4 x i32> @_Z31sub_group_non_uniform_broadcastDv4_ij(<4 x i32>, i32) local_unnamed_addr declare dso_local spir_func <8 x i32> @_Z31sub_group_non_uniform_broadcastDv8_ij(<8 x i32>, i32) local_unnamed_addr declare dso_local spir_func <16 x i32> @_Z31sub_group_non_uniform_broadcastDv16_ij(<16 x i32>, i32) local_unnamed_addr declare dso_local spir_func i32 @_Z25sub_group_broadcast_firsti(i32) local_unnamed_addr ; CHECK-SPIRV: OpFunction ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int]] %[[#ScopeSubgroup]] %[[#int_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#int2_0:]] = OpVectorShuffle %[[#int2]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int2]] %[[#ScopeSubgroup]] %[[#int2_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#int3_0:]] = OpVectorShuffle %[[#int3]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int3]] %[[#ScopeSubgroup]] %[[#int3_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#int4_0:]] = OpVectorShuffle %[[#int4]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int4]] %[[#ScopeSubgroup]] %[[#int4_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#int8_0:]] = OpVectorShuffle %[[#int8]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int8]] %[[#ScopeSubgroup]] %[[#int8_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#int16]] ; CHECK-SPIRV: %[[#int16_0:]] = OpVectorShuffle %[[#int16]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#int16]] %[[#ScopeSubgroup]] %[[#int16_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#int_value:]] = OpCompositeExtract %[[#int]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#int]] %[[#ScopeSubgroup]] %[[#int_value]] ; CHECK-SPIRV: OpFunctionEnd define dso_local spir_kernel void @testNonUniformBroadcastUInts() local_unnamed_addr { %1 = tail call spir_func i32 @_Z31sub_group_non_uniform_broadcastjj(i32 0, i32 0) %2 = insertelement <16 x i32> , i32 %1, i64 0 %3 = shufflevector <16 x i32> %2, <16 x i32> undef, <2 x i32> %4 = tail call spir_func <2 x i32> @_Z31sub_group_non_uniform_broadcastDv2_jj(<2 x i32> %3, i32 0) %5 = shufflevector <2 x i32> %4, <2 x i32> undef, <16 x i32> %6 = shufflevector <16 x i32> %5, <16 x i32> %2, <16 x i32> %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <3 x i32> %8 = tail call spir_func <3 x i32> @_Z31sub_group_non_uniform_broadcastDv3_jj(<3 x i32> %7, i32 0) %9 = shufflevector <3 x i32> %8, <3 x i32> undef, <16 x i32> %10 = shufflevector <16 x i32> %9, <16 x i32> %6, <16 x i32> %11 = shufflevector <16 x i32> %10, <16 x i32> undef, <4 x i32> %12 = tail call spir_func <4 x i32> @_Z31sub_group_non_uniform_broadcastDv4_jj(<4 x i32> %11, i32 0) %13 = shufflevector <4 x i32> %12, <4 x i32> undef, <16 x i32> %14 = shufflevector <16 x i32> %13, <16 x i32> %10, <16 x i32> %15 = shufflevector <16 x i32> %14, <16 x i32> undef, <8 x i32> %16 = tail call spir_func <8 x i32> @_Z31sub_group_non_uniform_broadcastDv8_jj(<8 x i32> %15, i32 0) %17 = shufflevector <8 x i32> %16, <8 x i32> undef, <16 x i32> %18 = shufflevector <16 x i32> %17, <16 x i32> %14, <16 x i32> %19 = tail call spir_func <16 x i32> @_Z31sub_group_non_uniform_broadcastDv16_jj(<16 x i32> %18, i32 0) %20 = extractelement <16 x i32> %19, i64 0 %21 = tail call spir_func i32 @_Z25sub_group_broadcast_firstj(i32 %20) ret void } declare dso_local spir_func i32 @_Z31sub_group_non_uniform_broadcastjj(i32, i32) local_unnamed_addr declare dso_local spir_func <2 x i32> @_Z31sub_group_non_uniform_broadcastDv2_jj(<2 x i32>, i32) local_unnamed_addr declare dso_local spir_func <3 x i32> @_Z31sub_group_non_uniform_broadcastDv3_jj(<3 x i32>, i32) local_unnamed_addr declare dso_local spir_func <4 x i32> @_Z31sub_group_non_uniform_broadcastDv4_jj(<4 x i32>, i32) local_unnamed_addr declare dso_local spir_func <8 x i32> @_Z31sub_group_non_uniform_broadcastDv8_jj(<8 x i32>, i32) local_unnamed_addr declare dso_local spir_func <16 x i32> @_Z31sub_group_non_uniform_broadcastDv16_jj(<16 x i32>, i32) local_unnamed_addr declare dso_local spir_func i32 @_Z25sub_group_broadcast_firstj(i32) local_unnamed_addr ; CHECK-SPIRV: OpFunction ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long]] %[[#ScopeSubgroup]] %[[#long_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#long2_0:]] = OpVectorShuffle %[[#long2]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long2]] %[[#ScopeSubgroup]] %[[#long2_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#long3_0:]] = OpVectorShuffle %[[#long3]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long3]] %[[#ScopeSubgroup]] %[[#long3_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#long4_0:]] = OpVectorShuffle %[[#long4]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long4]] %[[#ScopeSubgroup]] %[[#long4_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#long8_0:]] = OpVectorShuffle %[[#long8]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long8]] %[[#ScopeSubgroup]] %[[#long8_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#long16]] ; CHECK-SPIRV: %[[#long16_0:]] = OpVectorShuffle %[[#long16]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long16]] %[[#ScopeSubgroup]] %[[#long16_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#long_value:]] = OpCompositeExtract %[[#long]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#long]] %[[#ScopeSubgroup]] %[[#long_value]] ; CHECK-SPIRV: OpFunctionEnd define dso_local spir_kernel void @testNonUniformBroadcastLongs() local_unnamed_addr { %1 = tail call spir_func i64 @_Z31sub_group_non_uniform_broadcastlj(i64 0, i32 0) %2 = insertelement <16 x i64> , i64 %1, i64 0 %3 = shufflevector <16 x i64> %2, <16 x i64> undef, <2 x i32> %4 = tail call spir_func <2 x i64> @_Z31sub_group_non_uniform_broadcastDv2_lj(<2 x i64> %3, i32 0) %5 = shufflevector <2 x i64> %4, <2 x i64> undef, <16 x i32> %6 = shufflevector <16 x i64> %5, <16 x i64> %2, <16 x i32> %7 = shufflevector <16 x i64> %6, <16 x i64> undef, <3 x i32> %8 = tail call spir_func <3 x i64> @_Z31sub_group_non_uniform_broadcastDv3_lj(<3 x i64> %7, i32 0) %9 = shufflevector <3 x i64> %8, <3 x i64> undef, <16 x i32> %10 = shufflevector <16 x i64> %9, <16 x i64> %6, <16 x i32> %11 = shufflevector <16 x i64> %10, <16 x i64> undef, <4 x i32> %12 = tail call spir_func <4 x i64> @_Z31sub_group_non_uniform_broadcastDv4_lj(<4 x i64> %11, i32 0) %13 = shufflevector <4 x i64> %12, <4 x i64> undef, <16 x i32> %14 = shufflevector <16 x i64> %13, <16 x i64> %10, <16 x i32> %15 = shufflevector <16 x i64> %14, <16 x i64> undef, <8 x i32> %16 = tail call spir_func <8 x i64> @_Z31sub_group_non_uniform_broadcastDv8_lj(<8 x i64> %15, i32 0) %17 = shufflevector <8 x i64> %16, <8 x i64> undef, <16 x i32> %18 = shufflevector <16 x i64> %17, <16 x i64> %14, <16 x i32> %19 = tail call spir_func <16 x i64> @_Z31sub_group_non_uniform_broadcastDv16_lj(<16 x i64> %18, i32 0) %20 = extractelement <16 x i64> %19, i64 0 %21 = tail call spir_func i64 @_Z25sub_group_broadcast_firstl(i64 %20) ret void } declare dso_local spir_func i64 @_Z31sub_group_non_uniform_broadcastlj(i64, i32) local_unnamed_addr declare dso_local spir_func <2 x i64> @_Z31sub_group_non_uniform_broadcastDv2_lj(<2 x i64>, i32) local_unnamed_addr declare dso_local spir_func <3 x i64> @_Z31sub_group_non_uniform_broadcastDv3_lj(<3 x i64>, i32) local_unnamed_addr declare dso_local spir_func <4 x i64> @_Z31sub_group_non_uniform_broadcastDv4_lj(<4 x i64>, i32) local_unnamed_addr declare dso_local spir_func <8 x i64> @_Z31sub_group_non_uniform_broadcastDv8_lj(<8 x i64>, i32) local_unnamed_addr declare dso_local spir_func <16 x i64> @_Z31sub_group_non_uniform_broadcastDv16_lj(<16 x i64>, i32) local_unnamed_addr declare dso_local spir_func i64 @_Z25sub_group_broadcast_firstl(i64) local_unnamed_addr ; CHECK-SPIRV: OpFunction ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long]] %[[#ScopeSubgroup]] %[[#long_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#long2_0:]] = OpVectorShuffle %[[#long2]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long2]] %[[#ScopeSubgroup]] %[[#long2_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#long3_0:]] = OpVectorShuffle %[[#long3]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long3]] %[[#ScopeSubgroup]] %[[#long3_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#long4_0:]] = OpVectorShuffle %[[#long4]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long4]] %[[#ScopeSubgroup]] %[[#long4_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#long8_0:]] = OpVectorShuffle %[[#long8]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long8]] %[[#ScopeSubgroup]] %[[#long8_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#long16]] ; CHECK-SPIRV: %[[#long16_0:]] = OpVectorShuffle %[[#long16]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#long16]] %[[#ScopeSubgroup]] %[[#long16_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#long_value:]] = OpCompositeExtract %[[#long]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#long]] %[[#ScopeSubgroup]] %[[#long_value]] ; CHECK-SPIRV: OpFunctionEnd define dso_local spir_kernel void @testNonUniformBroadcastULongs() local_unnamed_addr { %1 = tail call spir_func i64 @_Z31sub_group_non_uniform_broadcastmj(i64 0, i32 0) %2 = insertelement <16 x i64> , i64 %1, i64 0 %3 = shufflevector <16 x i64> %2, <16 x i64> undef, <2 x i32> %4 = tail call spir_func <2 x i64> @_Z31sub_group_non_uniform_broadcastDv2_mj(<2 x i64> %3, i32 0) %5 = shufflevector <2 x i64> %4, <2 x i64> undef, <16 x i32> %6 = shufflevector <16 x i64> %5, <16 x i64> %2, <16 x i32> %7 = shufflevector <16 x i64> %6, <16 x i64> undef, <3 x i32> %8 = tail call spir_func <3 x i64> @_Z31sub_group_non_uniform_broadcastDv3_mj(<3 x i64> %7, i32 0) %9 = shufflevector <3 x i64> %8, <3 x i64> undef, <16 x i32> %10 = shufflevector <16 x i64> %9, <16 x i64> %6, <16 x i32> %11 = shufflevector <16 x i64> %10, <16 x i64> undef, <4 x i32> %12 = tail call spir_func <4 x i64> @_Z31sub_group_non_uniform_broadcastDv4_mj(<4 x i64> %11, i32 0) %13 = shufflevector <4 x i64> %12, <4 x i64> undef, <16 x i32> %14 = shufflevector <16 x i64> %13, <16 x i64> %10, <16 x i32> %15 = shufflevector <16 x i64> %14, <16 x i64> undef, <8 x i32> %16 = tail call spir_func <8 x i64> @_Z31sub_group_non_uniform_broadcastDv8_mj(<8 x i64> %15, i32 0) %17 = shufflevector <8 x i64> %16, <8 x i64> undef, <16 x i32> %18 = shufflevector <16 x i64> %17, <16 x i64> %14, <16 x i32> %19 = tail call spir_func <16 x i64> @_Z31sub_group_non_uniform_broadcastDv16_mj(<16 x i64> %18, i32 0) %20 = extractelement <16 x i64> %19, i64 0 %21 = tail call spir_func i64 @_Z25sub_group_broadcast_firstm(i64 %20) ret void } declare dso_local spir_func i64 @_Z31sub_group_non_uniform_broadcastmj(i64, i32) local_unnamed_addr declare dso_local spir_func <2 x i64> @_Z31sub_group_non_uniform_broadcastDv2_mj(<2 x i64>, i32) local_unnamed_addr declare dso_local spir_func <3 x i64> @_Z31sub_group_non_uniform_broadcastDv3_mj(<3 x i64>, i32) local_unnamed_addr declare dso_local spir_func <4 x i64> @_Z31sub_group_non_uniform_broadcastDv4_mj(<4 x i64>, i32) local_unnamed_addr declare dso_local spir_func <8 x i64> @_Z31sub_group_non_uniform_broadcastDv8_mj(<8 x i64>, i32) local_unnamed_addr declare dso_local spir_func <16 x i64> @_Z31sub_group_non_uniform_broadcastDv16_mj(<16 x i64>, i32) local_unnamed_addr declare dso_local spir_func i64 @_Z25sub_group_broadcast_firstm(i64) local_unnamed_addr ; CHECK-SPIRV: OpFunction ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#float]] %[[#ScopeSubgroup]] %[[#float_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#float2_0:]] = OpVectorShuffle %[[#float2]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#float2]] %[[#ScopeSubgroup]] %[[#float2_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#float3_0:]] = OpVectorShuffle %[[#float3]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#float3]] %[[#ScopeSubgroup]] %[[#float3_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#float4_0:]] = OpVectorShuffle %[[#float4]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#float4]] %[[#ScopeSubgroup]] %[[#float4_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#float8_0:]] = OpVectorShuffle %[[#float8]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#float8]] %[[#ScopeSubgroup]] %[[#float8_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#float16]] ; CHECK-SPIRV: %[[#float16_0:]] = OpVectorShuffle %[[#float16]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#float16]] %[[#ScopeSubgroup]] %[[#float16_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#float_value:]] = OpCompositeExtract %[[#float]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#float]] %[[#ScopeSubgroup]] %[[#float_value]] ; CHECK-SPIRV: OpFunctionEnd define dso_local spir_kernel void @testNonUniformBroadcastFloats() local_unnamed_addr { %1 = tail call spir_func float @_Z31sub_group_non_uniform_broadcastfj(float 0.000000e+00, i32 0) %2 = insertelement <16 x float> , float %1, i64 0 %3 = shufflevector <16 x float> %2, <16 x float> undef, <2 x i32> %4 = tail call spir_func <2 x float> @_Z31sub_group_non_uniform_broadcastDv2_fj(<2 x float> %3, i32 0) %5 = shufflevector <2 x float> %4, <2 x float> undef, <16 x i32> %6 = shufflevector <16 x float> %5, <16 x float> %2, <16 x i32> %7 = shufflevector <16 x float> %6, <16 x float> undef, <3 x i32> %8 = tail call spir_func <3 x float> @_Z31sub_group_non_uniform_broadcastDv3_fj(<3 x float> %7, i32 0) %9 = shufflevector <3 x float> %8, <3 x float> undef, <16 x i32> %10 = shufflevector <16 x float> %9, <16 x float> %6, <16 x i32> %11 = shufflevector <16 x float> %10, <16 x float> undef, <4 x i32> %12 = tail call spir_func <4 x float> @_Z31sub_group_non_uniform_broadcastDv4_fj(<4 x float> %11, i32 0) %13 = shufflevector <4 x float> %12, <4 x float> undef, <16 x i32> %14 = shufflevector <16 x float> %13, <16 x float> %10, <16 x i32> %15 = shufflevector <16 x float> %14, <16 x float> undef, <8 x i32> %16 = tail call spir_func <8 x float> @_Z31sub_group_non_uniform_broadcastDv8_fj(<8 x float> %15, i32 0) %17 = shufflevector <8 x float> %16, <8 x float> undef, <16 x i32> %18 = shufflevector <16 x float> %17, <16 x float> %14, <16 x i32> %19 = tail call spir_func <16 x float> @_Z31sub_group_non_uniform_broadcastDv16_fj(<16 x float> %18, i32 0) %20 = extractelement <16 x float> %19, i64 0 %21 = tail call spir_func float @_Z25sub_group_broadcast_firstf(float %20) ret void } declare dso_local spir_func float @_Z31sub_group_non_uniform_broadcastfj(float, i32) local_unnamed_addr declare dso_local spir_func <2 x float> @_Z31sub_group_non_uniform_broadcastDv2_fj(<2 x float>, i32) local_unnamed_addr declare dso_local spir_func <3 x float> @_Z31sub_group_non_uniform_broadcastDv3_fj(<3 x float>, i32) local_unnamed_addr declare dso_local spir_func <4 x float> @_Z31sub_group_non_uniform_broadcastDv4_fj(<4 x float>, i32) local_unnamed_addr declare dso_local spir_func <8 x float> @_Z31sub_group_non_uniform_broadcastDv8_fj(<8 x float>, i32) local_unnamed_addr declare dso_local spir_func <16 x float> @_Z31sub_group_non_uniform_broadcastDv16_fj(<16 x float>, i32) local_unnamed_addr declare dso_local spir_func float @_Z25sub_group_broadcast_firstf(float) local_unnamed_addr ; CHECK-SPIRV: OpFunction ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#half]] %[[#ScopeSubgroup]] %[[#half_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#half2_0:]] = OpVectorShuffle %[[#half2]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#half2]] %[[#ScopeSubgroup]] %[[#half2_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#half3_0:]] = OpVectorShuffle %[[#half3]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#half3]] %[[#ScopeSubgroup]] %[[#half3_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#half4_0:]] = OpVectorShuffle %[[#half4]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#half4]] %[[#ScopeSubgroup]] %[[#half4_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#half8_0:]] = OpVectorShuffle %[[#half8]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#half8]] %[[#ScopeSubgroup]] %[[#half8_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#half16]] ; CHECK-SPIRV: %[[#half16_0:]] = OpVectorShuffle %[[#half16]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#half16]] %[[#ScopeSubgroup]] %[[#half16_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#half_value:]] = OpCompositeExtract %[[#half]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#half]] %[[#ScopeSubgroup]] %[[#half_value]] ; CHECK-SPIRV: OpFunctionEnd define dso_local spir_kernel void @testNonUniformBroadcastHalfs() local_unnamed_addr { %1 = tail call spir_func half @_Z31sub_group_non_uniform_broadcastDhj(half 0xH0000, i32 0) %2 = insertelement <16 x half> , half %1, i64 0 %3 = shufflevector <16 x half> %2, <16 x half> undef, <2 x i32> %4 = tail call spir_func <2 x half> @_Z31sub_group_non_uniform_broadcastDv2_Dhj(<2 x half> %3, i32 0) %5 = shufflevector <2 x half> %4, <2 x half> undef, <16 x i32> %6 = shufflevector <16 x half> %5, <16 x half> %2, <16 x i32> %7 = shufflevector <16 x half> %6, <16 x half> undef, <3 x i32> %8 = tail call spir_func <3 x half> @_Z31sub_group_non_uniform_broadcastDv3_Dhj(<3 x half> %7, i32 0) %9 = shufflevector <3 x half> %8, <3 x half> undef, <16 x i32> %10 = shufflevector <16 x half> %9, <16 x half> %6, <16 x i32> %11 = shufflevector <16 x half> %10, <16 x half> undef, <4 x i32> %12 = tail call spir_func <4 x half> @_Z31sub_group_non_uniform_broadcastDv4_Dhj(<4 x half> %11, i32 0) %13 = shufflevector <4 x half> %12, <4 x half> undef, <16 x i32> %14 = shufflevector <16 x half> %13, <16 x half> %10, <16 x i32> %15 = shufflevector <16 x half> %14, <16 x half> undef, <8 x i32> %16 = tail call spir_func <8 x half> @_Z31sub_group_non_uniform_broadcastDv8_Dhj(<8 x half> %15, i32 0) %17 = shufflevector <8 x half> %16, <8 x half> undef, <16 x i32> %18 = shufflevector <16 x half> %17, <16 x half> %14, <16 x i32> %19 = tail call spir_func <16 x half> @_Z31sub_group_non_uniform_broadcastDv16_Dhj(<16 x half> %18, i32 0) %20 = extractelement <16 x half> %19, i64 0 %21 = tail call spir_func half @_Z25sub_group_broadcast_firstDh(half %20) ret void } declare dso_local spir_func half @_Z31sub_group_non_uniform_broadcastDhj(half, i32) local_unnamed_addr declare dso_local spir_func <2 x half> @_Z31sub_group_non_uniform_broadcastDv2_Dhj(<2 x half>, i32) local_unnamed_addr declare dso_local spir_func <3 x half> @_Z31sub_group_non_uniform_broadcastDv3_Dhj(<3 x half>, i32) local_unnamed_addr declare dso_local spir_func <4 x half> @_Z31sub_group_non_uniform_broadcastDv4_Dhj(<4 x half>, i32) local_unnamed_addr declare dso_local spir_func <8 x half> @_Z31sub_group_non_uniform_broadcastDv8_Dhj(<8 x half>, i32) local_unnamed_addr declare dso_local spir_func <16 x half> @_Z31sub_group_non_uniform_broadcastDv16_Dhj(<16 x half>, i32) local_unnamed_addr declare dso_local spir_func half @_Z25sub_group_broadcast_firstDh(half) local_unnamed_addr ; CHECK-SPIRV: OpFunction ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#double]] %[[#ScopeSubgroup]] %[[#double_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#double2_0:]] = OpVectorShuffle %[[#double2]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#double2]] %[[#ScopeSubgroup]] %[[#double2_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#double3_0:]] = OpVectorShuffle %[[#double3]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#double3]] %[[#ScopeSubgroup]] %[[#double3_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#double4_0:]] = OpVectorShuffle %[[#double4]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#double4]] %[[#ScopeSubgroup]] %[[#double4_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#double8_0:]] = OpVectorShuffle %[[#double8]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#double8]] %[[#ScopeSubgroup]] %[[#double8_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#]] = OpVectorShuffle %[[#double16]] ; CHECK-SPIRV: %[[#double16_0:]] = OpVectorShuffle %[[#double16]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcast %[[#double16]] %[[#ScopeSubgroup]] %[[#double16_0]] %[[#int_0]] ; CHECK-SPIRV: %[[#double_value:]] = OpCompositeExtract %[[#double]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBroadcastFirst %[[#double]] %[[#ScopeSubgroup]] %[[#double_value]] ; CHECK-SPIRV: OpFunctionEnd define dso_local spir_kernel void @testNonUniformBroadcastDoubles() local_unnamed_addr { %1 = tail call spir_func double @_Z31sub_group_non_uniform_broadcastdj(double 0.000000e+00, i32 0) %2 = insertelement <16 x double> , double %1, i64 0 %3 = shufflevector <16 x double> %2, <16 x double> undef, <2 x i32> %4 = tail call spir_func <2 x double> @_Z31sub_group_non_uniform_broadcastDv2_dj(<2 x double> %3, i32 0) %5 = shufflevector <2 x double> %4, <2 x double> undef, <16 x i32> %6 = shufflevector <16 x double> %5, <16 x double> %2, <16 x i32> %7 = shufflevector <16 x double> %6, <16 x double> undef, <3 x i32> %8 = tail call spir_func <3 x double> @_Z31sub_group_non_uniform_broadcastDv3_dj(<3 x double> %7, i32 0) %9 = shufflevector <3 x double> %8, <3 x double> undef, <16 x i32> %10 = shufflevector <16 x double> %9, <16 x double> %6, <16 x i32> %11 = shufflevector <16 x double> %10, <16 x double> undef, <4 x i32> %12 = tail call spir_func <4 x double> @_Z31sub_group_non_uniform_broadcastDv4_dj(<4 x double> %11, i32 0) %13 = shufflevector <4 x double> %12, <4 x double> undef, <16 x i32> %14 = shufflevector <16 x double> %13, <16 x double> %10, <16 x i32> %15 = shufflevector <16 x double> %14, <16 x double> undef, <8 x i32> %16 = tail call spir_func <8 x double> @_Z31sub_group_non_uniform_broadcastDv8_dj(<8 x double> %15, i32 0) %17 = shufflevector <8 x double> %16, <8 x double> undef, <16 x i32> %18 = shufflevector <16 x double> %17, <16 x double> %14, <16 x i32> %19 = tail call spir_func <16 x double> @_Z31sub_group_non_uniform_broadcastDv16_dj(<16 x double> %18, i32 0) %20 = extractelement <16 x double> %19, i64 0 %21 = tail call spir_func double @_Z25sub_group_broadcast_firstd(double %20) ret void } declare dso_local spir_func double @_Z31sub_group_non_uniform_broadcastdj(double, i32) local_unnamed_addr declare dso_local spir_func <2 x double> @_Z31sub_group_non_uniform_broadcastDv2_dj(<2 x double>, i32) local_unnamed_addr declare dso_local spir_func <3 x double> @_Z31sub_group_non_uniform_broadcastDv3_dj(<3 x double>, i32) local_unnamed_addr declare dso_local spir_func <4 x double> @_Z31sub_group_non_uniform_broadcastDv4_dj(<4 x double>, i32) local_unnamed_addr declare dso_local spir_func <8 x double> @_Z31sub_group_non_uniform_broadcastDv8_dj(<8 x double>, i32) local_unnamed_addr declare dso_local spir_func <16 x double> @_Z31sub_group_non_uniform_broadcastDv16_dj(<16 x double>, i32) local_unnamed_addr declare dso_local spir_func double @_Z25sub_group_broadcast_firstd(double) local_unnamed_addr ; CHECK-SPIRV: OpFunction ; CHECK-SPIRV: %[[#ballot:]] = OpGroupNonUniformBallot %[[#int4]] %[[#ScopeSubgroup]] %[[#false]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformInverseBallot %[[#bool]] %[[#ScopeSubgroup]] %[[#ballot]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotBitExtract %[[#bool]] %[[#ScopeSubgroup]] %[[#ballot]] %[[#int_0]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotBitCount %[[#int]] %[[#ScopeSubgroup]] Reduce %[[#ballot]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotBitCount %[[#int]] %[[#ScopeSubgroup]] InclusiveScan %[[#ballot]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotBitCount %[[#int]] %[[#ScopeSubgroup]] ExclusiveScan %[[#ballot]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotFindLSB %[[#int]] %[[#ScopeSubgroup]] %[[#ballot]] ; CHECK-SPIRV: %[[#]] = OpGroupNonUniformBallotFindMSB %[[#int]] %[[#ScopeSubgroup]] %[[#ballot]] ; CHECK-SPIRV: OpFunctionEnd define dso_local spir_kernel void @testBallotOperations(i32 addrspace(1)* nocapture) local_unnamed_addr { %2 = tail call spir_func <4 x i32> @_Z16sub_group_balloti(i32 0) %3 = tail call spir_func i32 @_Z24sub_group_inverse_ballotDv4_j(<4 x i32> %2) store i32 %3, i32 addrspace(1)* %0, align 4 %4 = tail call spir_func i32 @_Z28sub_group_ballot_bit_extractDv4_jj(<4 x i32> %2, i32 0) %5 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 1 store i32 %4, i32 addrspace(1)* %5, align 4 %6 = tail call spir_func i32 @_Z26sub_group_ballot_bit_countDv4_j(<4 x i32> %2) %7 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 2 store i32 %6, i32 addrspace(1)* %7, align 4 %8 = tail call spir_func i32 @_Z31sub_group_ballot_inclusive_scanDv4_j(<4 x i32> %2) %9 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 3 store i32 %8, i32 addrspace(1)* %9, align 4 %10 = tail call spir_func i32 @_Z31sub_group_ballot_exclusive_scanDv4_j(<4 x i32> %2) %11 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 4 store i32 %10, i32 addrspace(1)* %11, align 4 %12 = tail call spir_func i32 @_Z25sub_group_ballot_find_lsbDv4_j(<4 x i32> %2) %13 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 5 store i32 %12, i32 addrspace(1)* %13, align 4 %14 = tail call spir_func i32 @_Z25sub_group_ballot_find_msbDv4_j(<4 x i32> %2) %15 = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 6 store i32 %14, i32 addrspace(1)* %15, align 4 ret void } declare dso_local spir_func <4 x i32> @_Z16sub_group_balloti(i32) local_unnamed_addr declare dso_local spir_func i32 @_Z24sub_group_inverse_ballotDv4_j(<4 x i32>) local_unnamed_addr declare dso_local spir_func i32 @_Z28sub_group_ballot_bit_extractDv4_jj(<4 x i32>, i32) local_unnamed_addr declare dso_local spir_func i32 @_Z26sub_group_ballot_bit_countDv4_j(<4 x i32>) local_unnamed_addr declare dso_local spir_func i32 @_Z31sub_group_ballot_inclusive_scanDv4_j(<4 x i32>) local_unnamed_addr declare dso_local spir_func i32 @_Z31sub_group_ballot_exclusive_scanDv4_j(<4 x i32>) local_unnamed_addr declare dso_local spir_func i32 @_Z25sub_group_ballot_find_lsbDv4_j(<4 x i32>) local_unnamed_addr declare dso_local spir_func i32 @_Z25sub_group_ballot_find_msbDv4_j(<4 x i32>) local_unnamed_addr ; CHECK-SPIRV: OpFunction ; CHECK-SPIRV: %[[#]] = OpLoad %[[#int4]] %[[#eqMask]] ; CHECK-SPIRV: %[[#]] = OpLoad %[[#int4]] %[[#geMask]] ; CHECK-SPIRV: %[[#]] = OpLoad %[[#int4]] %[[#gtMask]] ; CHECK-SPIRV: %[[#]] = OpLoad %[[#int4]] %[[#leMask]] ; CHECK-SPIRV: %[[#]] = OpLoad %[[#int4]] %[[#ltMask]] ; CHECK-SPIRV: OpFunctionEnd define dso_local spir_kernel void @testSubgroupMasks(<4 x i32> addrspace(1)* nocapture) local_unnamed_addr { %2 = tail call spir_func <4 x i32> @_Z21get_sub_group_eq_maskv() store <4 x i32> %2, <4 x i32> addrspace(1)* %0, align 16 %3 = tail call spir_func <4 x i32> @_Z21get_sub_group_ge_maskv() %4 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %0, i64 1 store <4 x i32> %3, <4 x i32> addrspace(1)* %4, align 16 %5 = tail call spir_func <4 x i32> @_Z21get_sub_group_gt_maskv() %6 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %0, i64 2 store <4 x i32> %5, <4 x i32> addrspace(1)* %6, align 16 %7 = tail call spir_func <4 x i32> @_Z21get_sub_group_le_maskv() %8 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %0, i64 3 store <4 x i32> %7, <4 x i32> addrspace(1)* %8, align 16 %9 = tail call spir_func <4 x i32> @_Z21get_sub_group_lt_maskv() %10 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %0, i64 4 store <4 x i32> %9, <4 x i32> addrspace(1)* %10, align 16 ret void } declare dso_local spir_func <4 x i32> @_Z21get_sub_group_eq_maskv() local_unnamed_addr declare dso_local spir_func <4 x i32> @_Z21get_sub_group_ge_maskv() local_unnamed_addr declare dso_local spir_func <4 x i32> @_Z21get_sub_group_gt_maskv() local_unnamed_addr declare dso_local spir_func <4 x i32> @_Z21get_sub_group_le_maskv() local_unnamed_addr declare dso_local spir_func <4 x i32> @_Z21get_sub_group_lt_maskv() local_unnamed_addr