ACC registers are a combination of four consecutive vector registers. If the vector registers are assigned first this often forces a number of copies to appear just before the ACC register is created. If the ACC register is assigned first then fewer copies are generated when the vector registers are assigned. This patch tries to force the register allocator to assign the ACC registers first and then the UACC registers and then the vector pair registers. It does this by changing the priority of the register classes. This patch also adds hints to help the register allocator assign UACC registers from known ACC registers and vector pair registers from known UACC registers. Reviewed By: nemanjai Differential Revision: https://reviews.llvm.org/D105854
334 lines
17 KiB
LLVM
334 lines
17 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux-gnu \
|
|
; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \
|
|
; RUN: | FileCheck %s
|
|
; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux-gnu \
|
|
; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
|
|
; RUN: -ppc-track-subreg-liveness < %s | FileCheck %s --check-prefix=TRACKLIVE
|
|
|
|
%0 = type <{ double }>
|
|
%1 = type <{ double }>
|
|
|
|
define void @acc_regalloc(i32* %arg, [0 x %0]* %arg1, [0 x %1]* %arg2) local_unnamed_addr {
|
|
; CHECK-LABEL: acc_regalloc:
|
|
; CHECK: # %bb.0: # %bb
|
|
; CHECK-NEXT: lwz r3, 0(r3)
|
|
; CHECK-NEXT: lxv v4, 0(0)
|
|
; CHECK-NEXT: xxlxor v0, v0, v0
|
|
; CHECK-NEXT: xxlxor v1, v1, v1
|
|
; CHECK-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill
|
|
; CHECK-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill
|
|
; CHECK-NEXT: xxlxor v2, v2, v2
|
|
; CHECK-NEXT: li r6, 1
|
|
; CHECK-NEXT: li r4, 16
|
|
; CHECK-NEXT: extswsli r3, r3, 3
|
|
; CHECK-NEXT: xvmaddadp v1, v4, v1
|
|
; CHECK-NEXT: lxvdsx v5, 0, r3
|
|
; CHECK-NEXT: xvmaddadp v0, v5, v0
|
|
; CHECK-NEXT: .p2align 4
|
|
; CHECK-NEXT: .LBB0_1: # %bb9
|
|
; CHECK-NEXT: #
|
|
; CHECK-NEXT: addi r6, r6, 2
|
|
; CHECK-NEXT: lxv vs1, -64(r5)
|
|
; CHECK-NEXT: lxv vs2, -16(r5)
|
|
; CHECK-NEXT: lxv vs0, 16(0)
|
|
; CHECK-NEXT: vmr v9, v0
|
|
; CHECK-NEXT: xxlxor v10, v10, v10
|
|
; CHECK-NEXT: xxlxor v7, v7, v7
|
|
; CHECK-NEXT: mulld r6, r6, r3
|
|
; CHECK-NEXT: xvmaddadp v9, vs1, v2
|
|
; CHECK-NEXT: xxlxor v8, v8, v8
|
|
; CHECK-NEXT: xvmaddadp v10, vs2, v10
|
|
; CHECK-NEXT: xvmaddadp v7, vs0, v5
|
|
; CHECK-NEXT: xvmuldp v6, vs0, v2
|
|
; CHECK-NEXT: xvmaddadp v7, v2, v2
|
|
; CHECK-NEXT: xvmaddadp v6, v2, v2
|
|
; CHECK-NEXT: lxvdsx v14, r6, r4
|
|
; CHECK-NEXT: xvmaddadp v8, vs1, v8
|
|
; CHECK-NEXT: li r6, 0
|
|
; CHECK-NEXT: xvmuldp v11, vs2, v14
|
|
; CHECK-NEXT: xvmuldp v3, vs1, v14
|
|
; CHECK-NEXT: xvmuldp vs5, v14, v2
|
|
; CHECK-NEXT: xvmuldp v13, v4, v14
|
|
; CHECK-NEXT: vmr v12, v2
|
|
; CHECK-NEXT: xxlor vs14, v10, v10
|
|
; CHECK-NEXT: xxlor vs0, v2, v2
|
|
; CHECK-NEXT: xxlor vs4, v2, v2
|
|
; CHECK-NEXT: # kill: def $vsrp2 killed $vsrp2 def $uacc1
|
|
; CHECK-NEXT: xxlor vs6, v6, v6
|
|
; CHECK-NEXT: xxlor vs7, v7, v7
|
|
; CHECK-NEXT: xxlor vs8, v12, v12
|
|
; CHECK-NEXT: xxlor vs9, v13, v13
|
|
; CHECK-NEXT: vmr v12, v1
|
|
; CHECK-NEXT: xxlor vs15, v11, v11
|
|
; CHECK-NEXT: vmr v10, v2
|
|
; CHECK-NEXT: xxlor vs1, v3, v3
|
|
; CHECK-NEXT: xxlor vs2, v8, v8
|
|
; CHECK-NEXT: xxlor vs3, v9, v9
|
|
; CHECK-NEXT: xxlor vs10, v12, v12
|
|
; CHECK-NEXT: xxlor vs11, v13, v13
|
|
; CHECK-NEXT: xxmtacc acc1
|
|
; CHECK-NEXT: xxlor vs12, v10, v10
|
|
; CHECK-NEXT: xxlor vs13, v11, v11
|
|
; CHECK-NEXT: xxmtacc acc0
|
|
; CHECK-NEXT: xxmtacc acc2
|
|
; CHECK-NEXT: xvf64gerpp acc0, vsp34, vs0
|
|
; CHECK-NEXT: xxmtacc acc3
|
|
; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc0, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc0, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc0, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc0, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc0, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc0, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc1, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc2, vsp34, vs0
|
|
; CHECK-NEXT: xvf64gerpp acc3, vsp34, vs0
|
|
; CHECK-NEXT: xxmfacc acc0
|
|
; CHECK-NEXT: xxmfacc acc1
|
|
; CHECK-NEXT: xxmfacc acc2
|
|
; CHECK-NEXT: xxmfacc acc3
|
|
; CHECK-NEXT: stxv vs1, 0(r3)
|
|
; CHECK-NEXT: stxv vs9, 32(r3)
|
|
; CHECK-NEXT: stxv vs4, 16(0)
|
|
; CHECK-NEXT: stxv vs12, 48(0)
|
|
; CHECK-NEXT: b .LBB0_1
|
|
;
|
|
; TRACKLIVE-LABEL: acc_regalloc:
|
|
; TRACKLIVE: # %bb.0: # %bb
|
|
; TRACKLIVE-NEXT: lwz r3, 0(r3)
|
|
; TRACKLIVE-NEXT: lxv v4, 0(0)
|
|
; TRACKLIVE-NEXT: xxlxor v0, v0, v0
|
|
; TRACKLIVE-NEXT: xxlxor v1, v1, v1
|
|
; TRACKLIVE-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill
|
|
; TRACKLIVE-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill
|
|
; TRACKLIVE-NEXT: xxlxor v2, v2, v2
|
|
; TRACKLIVE-NEXT: li r6, 1
|
|
; TRACKLIVE-NEXT: li r4, 16
|
|
; TRACKLIVE-NEXT: extswsli r3, r3, 3
|
|
; TRACKLIVE-NEXT: xvmaddadp v1, v4, v1
|
|
; TRACKLIVE-NEXT: lxvdsx v5, 0, r3
|
|
; TRACKLIVE-NEXT: xvmaddadp v0, v5, v0
|
|
; TRACKLIVE-NEXT: .p2align 4
|
|
; TRACKLIVE-NEXT: .LBB0_1: # %bb9
|
|
; TRACKLIVE-NEXT: #
|
|
; TRACKLIVE-NEXT: addi r6, r6, 2
|
|
; TRACKLIVE-NEXT: lxv vs0, 16(0)
|
|
; TRACKLIVE-NEXT: xxlxor vs7, vs7, vs7
|
|
; TRACKLIVE-NEXT: lxv vs1, -64(r5)
|
|
; TRACKLIVE-NEXT: lxv vs4, -16(r5)
|
|
; TRACKLIVE-NEXT: xxlxor vs12, vs12, vs12
|
|
; TRACKLIVE-NEXT: xxlor vs3, v0, v0
|
|
; TRACKLIVE-NEXT: xxlxor vs2, vs2, vs2
|
|
; TRACKLIVE-NEXT: mulld r6, r6, r3
|
|
; TRACKLIVE-NEXT: xxlor vs10, v2, v2
|
|
; TRACKLIVE-NEXT: xxlor vs8, vs10, vs10
|
|
; TRACKLIVE-NEXT: xxlor vs10, v1, v1
|
|
; TRACKLIVE-NEXT: xvmaddadp vs7, vs0, v5
|
|
; TRACKLIVE-NEXT: xvmuldp vs6, vs0, v2
|
|
; TRACKLIVE-NEXT: xvmaddadp vs12, vs4, vs12
|
|
; TRACKLIVE-NEXT: xvmaddadp vs3, vs1, v2
|
|
; TRACKLIVE-NEXT: xvmaddadp vs2, vs1, vs2
|
|
; TRACKLIVE-NEXT: xxlor vs0, v2, v2
|
|
; TRACKLIVE-NEXT: lxvdsx v6, r6, r4
|
|
; TRACKLIVE-NEXT: li r6, 0
|
|
; TRACKLIVE-NEXT: xvmaddadp vs7, v2, v2
|
|
; TRACKLIVE-NEXT: xvmaddadp vs6, v2, v2
|
|
; TRACKLIVE-NEXT: xxlor vs14, vs12, vs12
|
|
; TRACKLIVE-NEXT: xxlor vs12, v2, v2
|
|
; TRACKLIVE-NEXT: xvmuldp v3, vs1, v6
|
|
; TRACKLIVE-NEXT: xvmuldp vs11, v4, v6
|
|
; TRACKLIVE-NEXT: xvmuldp vs13, vs4, v6
|
|
; TRACKLIVE-NEXT: xvmuldp vs5, v6, v2
|
|
; TRACKLIVE-NEXT: xxlor vs4, v2, v2
|
|
; TRACKLIVE-NEXT: xxlor vs1, v3, v3
|
|
; TRACKLIVE-NEXT: xxlor vs9, vs11, vs11
|
|
; TRACKLIVE-NEXT: xxlor vs15, vs13, vs13
|
|
; TRACKLIVE-NEXT: xxmtacc acc1
|
|
; TRACKLIVE-NEXT: xxmtacc acc0
|
|
; TRACKLIVE-NEXT: xxmtacc acc2
|
|
; TRACKLIVE-NEXT: xxmtacc acc3
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc0, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc0, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc0, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc0, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc0, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc0, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc0, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc1, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc2, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xvf64gerpp acc3, vsp34, vs0
|
|
; TRACKLIVE-NEXT: xxmfacc acc0
|
|
; TRACKLIVE-NEXT: xxmfacc acc1
|
|
; TRACKLIVE-NEXT: xxmfacc acc2
|
|
; TRACKLIVE-NEXT: xxmfacc acc3
|
|
; TRACKLIVE-NEXT: stxv vs1, 0(r3)
|
|
; TRACKLIVE-NEXT: stxv vs9, 32(r3)
|
|
; TRACKLIVE-NEXT: stxv vs4, 16(0)
|
|
; TRACKLIVE-NEXT: stxv vs12, 48(0)
|
|
; TRACKLIVE-NEXT: b .LBB0_1
|
|
bb:
|
|
%i = load i32, i32* %arg, align 4
|
|
%i3 = sext i32 %i to i64
|
|
%i4 = shl nsw i64 %i3, 3
|
|
%i5 = bitcast [0 x %0]* %arg1 to i8*
|
|
%i6 = getelementptr i8, i8* %i5, i64 undef
|
|
%i7 = getelementptr [0 x %1], [0 x %1]* %arg2, i64 0, i64 -8
|
|
%i8 = getelementptr i8, i8* %i6, i64 undef
|
|
br label %bb9
|
|
|
|
bb9: ; preds = %bb95, %bb
|
|
%i10 = phi i64 [ 1, %bb ], [ 0, %bb95 ]
|
|
%i11 = getelementptr %1, %1* null, i64 2
|
|
%i12 = bitcast %1* %i11 to <2 x double>*
|
|
%i13 = load <2 x double>, <2 x double>* %i12, align 1
|
|
%i14 = add nuw nsw i64 %i10, 2
|
|
%i15 = getelementptr inbounds %1, %1* %i7, i64 undef
|
|
%i16 = bitcast %1* %i15 to <2 x double>*
|
|
%i17 = load <2 x double>, <2 x double>* %i16, align 1
|
|
%i18 = load <2 x double>, <2 x double>* null, align 1
|
|
%i19 = getelementptr %1, %1* %i15, i64 6
|
|
%i20 = bitcast %1* %i19 to <2 x double>*
|
|
%i21 = load <2 x double>, <2 x double>* %i20, align 1
|
|
%i22 = load i64, i64* undef, align 8
|
|
%i23 = insertelement <2 x i64> poison, i64 %i22, i32 0
|
|
%i24 = bitcast <2 x i64> %i23 to <2 x double>
|
|
%i25 = shufflevector <2 x double> %i24, <2 x double> undef, <2 x i32> zeroinitializer
|
|
%i26 = mul i64 %i14, %i4
|
|
%i27 = getelementptr i8, i8* null, i64 %i26
|
|
%i28 = getelementptr inbounds i8, i8* %i27, i64 0
|
|
%i29 = getelementptr i8, i8* %i28, i64 16
|
|
%i30 = bitcast i8* %i29 to i64*
|
|
%i31 = load i64, i64* %i30, align 8
|
|
%i32 = insertelement <2 x i64> poison, i64 %i31, i32 0
|
|
%i33 = bitcast <2 x i64> %i32 to <2 x double>
|
|
%i34 = shufflevector <2 x double> %i33, <2 x double> undef, <2 x i32> zeroinitializer
|
|
%i35 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> zeroinitializer, <2 x double> %i25, <2 x double> zeroinitializer)
|
|
%i36 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i13, <2 x double> %i25, <2 x double> zeroinitializer)
|
|
%i37 = fmul contract <2 x double> %i13, zeroinitializer
|
|
%i38 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i17, <2 x double> zeroinitializer, <2 x double> %i35)
|
|
%i39 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, <2 x double> %i36)
|
|
%i40 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i17, <2 x double> zeroinitializer, <2 x double> zeroinitializer)
|
|
%i41 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> zeroinitializer, <2 x double> zeroinitializer, <2 x double> %i37)
|
|
%i42 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i18, <2 x double> zeroinitializer, <2 x double> zeroinitializer)
|
|
%i43 = tail call contract <2 x double> @llvm.fma.v2f64(<2 x double> %i21, <2 x double> zeroinitializer, <2 x double> zeroinitializer)
|
|
%i44 = fmul contract <2 x double> %i17, %i34
|
|
%i45 = fmul contract <2 x double> zeroinitializer, %i34
|
|
%i46 = fmul contract <2 x double> %i18, %i34
|
|
%i47 = fmul contract <2 x double> %i21, %i34
|
|
%i48 = bitcast <2 x double> %i44 to <16 x i8>
|
|
%i49 = bitcast <2 x double> %i40 to <16 x i8>
|
|
%i50 = bitcast <2 x double> %i38 to <16 x i8>
|
|
%i51 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> %i48, <16 x i8> %i49, <16 x i8> %i50)
|
|
%i52 = bitcast <2 x double> %i45 to <16 x i8>
|
|
%i53 = bitcast <2 x double> %i41 to <16 x i8>
|
|
%i54 = bitcast <2 x double> %i39 to <16 x i8>
|
|
%i55 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> %i52, <16 x i8> %i53, <16 x i8> %i54)
|
|
%i56 = bitcast <2 x double> %i46 to <16 x i8>
|
|
%i57 = bitcast <2 x double> %i42 to <16 x i8>
|
|
%i58 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> %i56, <16 x i8> %i57, <16 x i8> %i56)
|
|
%i59 = bitcast <2 x double> %i47 to <16 x i8>
|
|
%i60 = bitcast <2 x double> %i43 to <16 x i8>
|
|
%i61 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> zeroinitializer, <16 x i8> %i59, <16 x i8> %i60, <16 x i8> %i59)
|
|
%i62 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i51, <256 x i1> undef, <16 x i8> undef)
|
|
%i63 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i55, <256 x i1> undef, <16 x i8> undef)
|
|
%i64 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i58, <256 x i1> undef, <16 x i8> undef)
|
|
%i65 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i61, <256 x i1> undef, <16 x i8> undef)
|
|
%i66 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i62, <256 x i1> undef, <16 x i8> undef)
|
|
%i67 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i63, <256 x i1> undef, <16 x i8> undef)
|
|
%i68 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i64, <256 x i1> undef, <16 x i8> undef)
|
|
%i69 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i65, <256 x i1> undef, <16 x i8> undef)
|
|
%i70 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i66, <256 x i1> undef, <16 x i8> undef)
|
|
%i71 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i67, <256 x i1> undef, <16 x i8> undef)
|
|
%i72 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i68, <256 x i1> undef, <16 x i8> undef)
|
|
%i73 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i69, <256 x i1> undef, <16 x i8> undef)
|
|
%i74 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i70, <256 x i1> undef, <16 x i8> undef)
|
|
%i75 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i71, <256 x i1> undef, <16 x i8> undef)
|
|
%i76 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i72, <256 x i1> undef, <16 x i8> undef)
|
|
%i77 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i73, <256 x i1> undef, <16 x i8> undef)
|
|
%i78 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i74, <256 x i1> undef, <16 x i8> undef)
|
|
%i79 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i75, <256 x i1> undef, <16 x i8> undef)
|
|
%i80 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i76, <256 x i1> undef, <16 x i8> undef)
|
|
%i81 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i77, <256 x i1> undef, <16 x i8> undef)
|
|
br label %bb82
|
|
|
|
bb82: ; preds = %bb82, %bb9
|
|
%i83 = phi <512 x i1> [ %i94, %bb82 ], [ %i81, %bb9 ]
|
|
%i84 = phi <512 x i1> [ %i93, %bb82 ], [ %i80, %bb9 ]
|
|
%i85 = phi <512 x i1> [ %i92, %bb82 ], [ %i79, %bb9 ]
|
|
%i86 = phi <512 x i1> [ %i91, %bb82 ], [ %i78, %bb9 ]
|
|
%i87 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i86, <256 x i1> undef, <16 x i8> undef)
|
|
%i88 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i85, <256 x i1> undef, <16 x i8> undef)
|
|
%i89 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i84, <256 x i1> undef, <16 x i8> undef)
|
|
%i90 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i83, <256 x i1> undef, <16 x i8> undef)
|
|
%i91 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i87, <256 x i1> undef, <16 x i8> undef)
|
|
%i92 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i88, <256 x i1> undef, <16 x i8> undef)
|
|
%i93 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i89, <256 x i1> undef, <16 x i8> undef)
|
|
%i94 = tail call <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1> %i90, <256 x i1> undef, <16 x i8> undef)
|
|
br i1 undef, label %bb95, label %bb82
|
|
|
|
bb95: ; preds = %bb82
|
|
%i96 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %i91)
|
|
%i97 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %i96, 2
|
|
%i98 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %i92)
|
|
%i99 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %i98, 3
|
|
%i100 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %i93)
|
|
%i101 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %i100, 2
|
|
%i102 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %i94)
|
|
%i103 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %i102, 3
|
|
%i104 = getelementptr inbounds i8, i8* %i8, i64 undef
|
|
%i105 = bitcast i8* %i104 to <16 x i8>*
|
|
store <16 x i8> %i97, <16 x i8>* %i105, align 1
|
|
%i106 = getelementptr i8, i8* %i104, i64 32
|
|
%i107 = bitcast i8* %i106 to <16 x i8>*
|
|
store <16 x i8> %i101, <16 x i8>* %i107, align 1
|
|
%i108 = getelementptr i8, i8* null, i64 16
|
|
%i109 = bitcast i8* %i108 to <16 x i8>*
|
|
store <16 x i8> %i99, <16 x i8>* %i109, align 1
|
|
%i110 = getelementptr i8, i8* null, i64 48
|
|
%i111 = bitcast i8* %i110 to <16 x i8>*
|
|
store <16 x i8> %i103, <16 x i8>* %i111, align 1
|
|
br label %bb9
|
|
}
|
|
|
|
declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
|
|
declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
|
|
declare <512 x i1> @llvm.ppc.mma.xvf64gerpp(<512 x i1>, <256 x i1>, <16 x i8>)
|
|
declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>)
|
|
|