Unlike older ASICs GFX10+ have a lot of VGPRs. Therefore, it is possible to achieve high occupancy even with all or almost all addressable VGPRs used. Our scheduler was never tuned for this scenario. The VGPR Critical Limit threshold always comes very high, even if maximum occupancy is targeted. For example on gfx1100 it is set to 192 registers even with the requested occupancy 16. As a result scheduler starts prioritizing register pressure reduction very late and we easily end up spilling. This patch makes VGPR critical limit similar to what we would have on pre-gfx10 targets with much more limited VGPR budget while still trying to maintain occupancy as it does now. Pre-gfx10 ASICs shall not be affected as the limit shall be the same as before, and on gfx10+ it shall only affect regions where we have to spill. Fixes: SWDEV-377300 Differential Revision: https://reviews.llvm.org/D141876
296 lines
19 KiB
LLVM
296 lines
19 KiB
LLVM
; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
|
|
|
|
; GCN-LABEL: {{^}}load_fma_store
|
|
; GCN-NOT: scratch_store
|
|
; ScratchSize: 0{{$}}
|
|
|
|
define amdgpu_kernel void @load_fma_store(ptr addrspace(3) nocapture readonly %arg, ptr addrspace(1) nocapture %arg1, i1 %cc) {
|
|
bb:
|
|
%vgpr0 = call <32 x i32> asm sideeffect "; def $0","=${v[0:31]}"()
|
|
%vgpr1 = call <32 x i32> asm sideeffect "; def $0","=${v[32:63]}"()
|
|
%vgpr2 = call <32 x i32> asm sideeffect "; def $0","=${v[64:95]}"()
|
|
%vgpr3 = call <32 x i32> asm sideeffect "; def $0","=${v[96:127]}"()
|
|
br i1 %cc, label %bb1, label %bb2
|
|
|
|
bb1:
|
|
%tmp = getelementptr inbounds float, ptr addrspace(3) %arg, i32 1
|
|
%tmp2 = load float, ptr addrspace(3) %tmp, align 4
|
|
%tmp3 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 2
|
|
%tmp4 = load float, ptr addrspace(3) %tmp3, align 4
|
|
%tmp5 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 3
|
|
%tmp6 = load float, ptr addrspace(3) %tmp5, align 4
|
|
%tmp7 = tail call float @llvm.fmuladd.f32(float %tmp2, float %tmp4, float %tmp6)
|
|
%tmp8 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 5
|
|
%tmp9 = load float, ptr addrspace(3) %tmp8, align 4
|
|
%tmp10 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 6
|
|
%tmp11 = load float, ptr addrspace(3) %tmp10, align 4
|
|
%tmp12 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 7
|
|
%tmp13 = load float, ptr addrspace(3) %tmp12, align 4
|
|
%tmp14 = tail call float @llvm.fmuladd.f32(float %tmp9, float %tmp11, float %tmp13)
|
|
%tmp15 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 9
|
|
%tmp16 = load float, ptr addrspace(3) %tmp15, align 4
|
|
%tmp17 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 10
|
|
%tmp18 = load float, ptr addrspace(3) %tmp17, align 4
|
|
%tmp19 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 11
|
|
%tmp20 = load float, ptr addrspace(3) %tmp19, align 4
|
|
%tmp21 = tail call float @llvm.fmuladd.f32(float %tmp16, float %tmp18, float %tmp20)
|
|
%tmp22 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 13
|
|
%tmp23 = load float, ptr addrspace(3) %tmp22, align 4
|
|
%tmp24 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 14
|
|
%tmp25 = load float, ptr addrspace(3) %tmp24, align 4
|
|
%tmp26 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 15
|
|
%tmp27 = load float, ptr addrspace(3) %tmp26, align 4
|
|
%tmp28 = tail call float @llvm.fmuladd.f32(float %tmp23, float %tmp25, float %tmp27)
|
|
%tmp29 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 17
|
|
%tmp30 = load float, ptr addrspace(3) %tmp29, align 4
|
|
%tmp31 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 18
|
|
%tmp32 = load float, ptr addrspace(3) %tmp31, align 4
|
|
%tmp33 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 19
|
|
%tmp34 = load float, ptr addrspace(3) %tmp33, align 4
|
|
%tmp35 = tail call float @llvm.fmuladd.f32(float %tmp30, float %tmp32, float %tmp34)
|
|
%tmp36 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 21
|
|
%tmp37 = load float, ptr addrspace(3) %tmp36, align 4
|
|
%tmp38 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 22
|
|
%tmp39 = load float, ptr addrspace(3) %tmp38, align 4
|
|
%tmp40 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 23
|
|
%tmp41 = load float, ptr addrspace(3) %tmp40, align 4
|
|
%tmp42 = tail call float @llvm.fmuladd.f32(float %tmp37, float %tmp39, float %tmp41)
|
|
%tmp43 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 25
|
|
%tmp44 = load float, ptr addrspace(3) %tmp43, align 4
|
|
%tmp45 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 26
|
|
%tmp46 = load float, ptr addrspace(3) %tmp45, align 4
|
|
%tmp47 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 27
|
|
%tmp48 = load float, ptr addrspace(3) %tmp47, align 4
|
|
%tmp49 = tail call float @llvm.fmuladd.f32(float %tmp44, float %tmp46, float %tmp48)
|
|
%tmp50 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 29
|
|
%tmp51 = load float, ptr addrspace(3) %tmp50, align 4
|
|
%tmp52 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 30
|
|
%tmp53 = load float, ptr addrspace(3) %tmp52, align 4
|
|
%tmp54 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 31
|
|
%tmp55 = load float, ptr addrspace(3) %tmp54, align 4
|
|
%tmp56 = tail call float @llvm.fmuladd.f32(float %tmp51, float %tmp53, float %tmp55)
|
|
%tmp57 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 33
|
|
%tmp58 = load float, ptr addrspace(3) %tmp57, align 4
|
|
%tmp59 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 34
|
|
%tmp60 = load float, ptr addrspace(3) %tmp59, align 4
|
|
%tmp61 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 35
|
|
%tmp62 = load float, ptr addrspace(3) %tmp61, align 4
|
|
%tmp63 = tail call float @llvm.fmuladd.f32(float %tmp58, float %tmp60, float %tmp62)
|
|
%tmp64 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 37
|
|
%tmp65 = load float, ptr addrspace(3) %tmp64, align 4
|
|
%tmp66 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 38
|
|
%tmp67 = load float, ptr addrspace(3) %tmp66, align 4
|
|
%tmp68 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 39
|
|
%tmp69 = load float, ptr addrspace(3) %tmp68, align 4
|
|
%tmp70 = tail call float @llvm.fmuladd.f32(float %tmp65, float %tmp67, float %tmp69)
|
|
%tmp71 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 41
|
|
%tmp72 = load float, ptr addrspace(3) %tmp71, align 4
|
|
%tmp73 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 42
|
|
%tmp74 = load float, ptr addrspace(3) %tmp73, align 4
|
|
%tmp75 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 43
|
|
%tmp76 = load float, ptr addrspace(3) %tmp75, align 4
|
|
%tmp77 = tail call float @llvm.fmuladd.f32(float %tmp72, float %tmp74, float %tmp76)
|
|
%tmp78 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 45
|
|
%tmp79 = load float, ptr addrspace(3) %tmp78, align 4
|
|
%tmp80 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 46
|
|
%tmp81 = load float, ptr addrspace(3) %tmp80, align 4
|
|
%tmp82 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 47
|
|
%tmp83 = load float, ptr addrspace(3) %tmp82, align 4
|
|
%tmp84 = tail call float @llvm.fmuladd.f32(float %tmp79, float %tmp81, float %tmp83)
|
|
%tmp85 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 49
|
|
%tmp86 = load float, ptr addrspace(3) %tmp85, align 4
|
|
%tmp87 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 50
|
|
%tmp88 = load float, ptr addrspace(3) %tmp87, align 4
|
|
%tmp89 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 51
|
|
%tmp90 = load float, ptr addrspace(3) %tmp89, align 4
|
|
%tmp91 = tail call float @llvm.fmuladd.f32(float %tmp86, float %tmp88, float %tmp90)
|
|
%tmp92 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 53
|
|
%tmp93 = load float, ptr addrspace(3) %tmp92, align 4
|
|
%tmp94 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 54
|
|
%tmp95 = load float, ptr addrspace(3) %tmp94, align 4
|
|
%tmp96 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 55
|
|
%tmp97 = load float, ptr addrspace(3) %tmp96, align 4
|
|
%tmp98 = tail call float @llvm.fmuladd.f32(float %tmp93, float %tmp95, float %tmp97)
|
|
%tmp99 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 57
|
|
%tmp100 = load float, ptr addrspace(3) %tmp99, align 4
|
|
%tmp101 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 58
|
|
%tmp102 = load float, ptr addrspace(3) %tmp101, align 4
|
|
%tmp103 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 59
|
|
%tmp104 = load float, ptr addrspace(3) %tmp103, align 4
|
|
%tmp105 = tail call float @llvm.fmuladd.f32(float %tmp100, float %tmp102, float %tmp104)
|
|
%tmp106 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 61
|
|
%tmp107 = load float, ptr addrspace(3) %tmp106, align 4
|
|
%tmp108 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 62
|
|
%tmp109 = load float, ptr addrspace(3) %tmp108, align 4
|
|
%tmp110 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 63
|
|
%tmp111 = load float, ptr addrspace(3) %tmp110, align 4
|
|
%tmp112 = tail call float @llvm.fmuladd.f32(float %tmp107, float %tmp109, float %tmp111)
|
|
%tmp113 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 65
|
|
%tmp114 = load float, ptr addrspace(3) %tmp113, align 4
|
|
%tmp115 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 66
|
|
%tmp116 = load float, ptr addrspace(3) %tmp115, align 4
|
|
%tmp117 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 67
|
|
%tmp118 = load float, ptr addrspace(3) %tmp117, align 4
|
|
%tmp119 = tail call float @llvm.fmuladd.f32(float %tmp114, float %tmp116, float %tmp118)
|
|
%tmp120 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 69
|
|
%tmp121 = load float, ptr addrspace(3) %tmp120, align 4
|
|
%tmp122 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 70
|
|
%tmp123 = load float, ptr addrspace(3) %tmp122, align 4
|
|
%tmp124 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 71
|
|
%tmp125 = load float, ptr addrspace(3) %tmp124, align 4
|
|
%tmp126 = tail call float @llvm.fmuladd.f32(float %tmp121, float %tmp123, float %tmp125)
|
|
%tmp127 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 73
|
|
%tmp128 = load float, ptr addrspace(3) %tmp127, align 4
|
|
%tmp129 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 74
|
|
%tmp130 = load float, ptr addrspace(3) %tmp129, align 4
|
|
%tmp131 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 75
|
|
%tmp132 = load float, ptr addrspace(3) %tmp131, align 4
|
|
%tmp133 = tail call float @llvm.fmuladd.f32(float %tmp128, float %tmp130, float %tmp132)
|
|
%tmp134 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 77
|
|
%tmp135 = load float, ptr addrspace(3) %tmp134, align 4
|
|
%tmp136 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 78
|
|
%tmp137 = load float, ptr addrspace(3) %tmp136, align 4
|
|
%tmp138 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 79
|
|
%tmp139 = load float, ptr addrspace(3) %tmp138, align 4
|
|
%tmp140 = tail call float @llvm.fmuladd.f32(float %tmp135, float %tmp137, float %tmp139)
|
|
%tmp141 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 81
|
|
%tmp142 = load float, ptr addrspace(3) %tmp141, align 4
|
|
%tmp143 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 82
|
|
%tmp144 = load float, ptr addrspace(3) %tmp143, align 4
|
|
%tmp145 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 83
|
|
%tmp146 = load float, ptr addrspace(3) %tmp145, align 4
|
|
%tmp147 = tail call float @llvm.fmuladd.f32(float %tmp142, float %tmp144, float %tmp146)
|
|
%tmp148 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 85
|
|
%tmp149 = load float, ptr addrspace(3) %tmp148, align 4
|
|
%tmp150 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 86
|
|
%tmp151 = load float, ptr addrspace(3) %tmp150, align 4
|
|
%tmp152 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 87
|
|
%tmp153 = load float, ptr addrspace(3) %tmp152, align 4
|
|
%tmp154 = tail call float @llvm.fmuladd.f32(float %tmp149, float %tmp151, float %tmp153)
|
|
%tmp155 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 89
|
|
%tmp156 = load float, ptr addrspace(3) %tmp155, align 4
|
|
%tmp157 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 90
|
|
%tmp158 = load float, ptr addrspace(3) %tmp157, align 4
|
|
%tmp159 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 91
|
|
%tmp160 = load float, ptr addrspace(3) %tmp159, align 4
|
|
%tmp161 = tail call float @llvm.fmuladd.f32(float %tmp156, float %tmp158, float %tmp160)
|
|
%tmp162 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 93
|
|
%tmp163 = load float, ptr addrspace(3) %tmp162, align 4
|
|
%tmp164 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 94
|
|
%tmp165 = load float, ptr addrspace(3) %tmp164, align 4
|
|
%tmp166 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 95
|
|
%tmp167 = load float, ptr addrspace(3) %tmp166, align 4
|
|
%tmp168 = tail call float @llvm.fmuladd.f32(float %tmp163, float %tmp165, float %tmp167)
|
|
%tmp169 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 97
|
|
%tmp170 = load float, ptr addrspace(3) %tmp169, align 4
|
|
%tmp171 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 98
|
|
%tmp172 = load float, ptr addrspace(3) %tmp171, align 4
|
|
%tmp173 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 99
|
|
%tmp174 = load float, ptr addrspace(3) %tmp173, align 4
|
|
%tmp175 = tail call float @llvm.fmuladd.f32(float %tmp170, float %tmp172, float %tmp174)
|
|
%tmp176 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 101
|
|
%tmp177 = load float, ptr addrspace(3) %tmp176, align 4
|
|
%tmp178 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 102
|
|
%tmp179 = load float, ptr addrspace(3) %tmp178, align 4
|
|
%tmp180 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 103
|
|
%tmp181 = load float, ptr addrspace(3) %tmp180, align 4
|
|
%tmp182 = tail call float @llvm.fmuladd.f32(float %tmp177, float %tmp179, float %tmp181)
|
|
%tmp183 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 105
|
|
%tmp184 = load float, ptr addrspace(3) %tmp183, align 4
|
|
%tmp185 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 106
|
|
%tmp186 = load float, ptr addrspace(3) %tmp185, align 4
|
|
%tmp187 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 107
|
|
%tmp188 = load float, ptr addrspace(3) %tmp187, align 4
|
|
%tmp189 = tail call float @llvm.fmuladd.f32(float %tmp184, float %tmp186, float %tmp188)
|
|
%tmp190 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 109
|
|
%tmp191 = load float, ptr addrspace(3) %tmp190, align 4
|
|
%tmp192 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 110
|
|
%tmp193 = load float, ptr addrspace(3) %tmp192, align 4
|
|
%tmp194 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 111
|
|
%tmp195 = load float, ptr addrspace(3) %tmp194, align 4
|
|
%tmp196 = tail call float @llvm.fmuladd.f32(float %tmp191, float %tmp193, float %tmp195)
|
|
%tmp197 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 113
|
|
%tmp198 = load float, ptr addrspace(3) %tmp197, align 4
|
|
%tmp199 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 114
|
|
%tmp200 = load float, ptr addrspace(3) %tmp199, align 4
|
|
%tmp201 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 115
|
|
%tmp202 = load float, ptr addrspace(3) %tmp201, align 4
|
|
%tmp203 = tail call float @llvm.fmuladd.f32(float %tmp198, float %tmp200, float %tmp202)
|
|
%tmp204 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 117
|
|
%tmp205 = load float, ptr addrspace(3) %tmp204, align 4
|
|
%tmp206 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 118
|
|
%tmp207 = load float, ptr addrspace(3) %tmp206, align 4
|
|
%tmp208 = getelementptr inbounds float, ptr addrspace(3) %arg, i32 119
|
|
%tmp209 = load float, ptr addrspace(3) %tmp208, align 4
|
|
%tmp210 = tail call float @llvm.fmuladd.f32(float %tmp205, float %tmp207, float %tmp209)
|
|
store float %tmp7, ptr addrspace(1) %arg1, align 4
|
|
%tmp449 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 1
|
|
store float %tmp14, ptr addrspace(1) %tmp449, align 4
|
|
%tmp450 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 2
|
|
store float %tmp21, ptr addrspace(1) %tmp450, align 4
|
|
%tmp451 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 3
|
|
store float %tmp28, ptr addrspace(1) %tmp451, align 4
|
|
%tmp452 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 4
|
|
store float %tmp35, ptr addrspace(1) %tmp452, align 4
|
|
%tmp453 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 5
|
|
store float %tmp42, ptr addrspace(1) %tmp453, align 4
|
|
%tmp454 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 6
|
|
store float %tmp49, ptr addrspace(1) %tmp454, align 4
|
|
%tmp455 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 7
|
|
store float %tmp56, ptr addrspace(1) %tmp455, align 4
|
|
%tmp456 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 8
|
|
store float %tmp63, ptr addrspace(1) %tmp456, align 4
|
|
%tmp457 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 9
|
|
store float %tmp70, ptr addrspace(1) %tmp457, align 4
|
|
%tmp458 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 10
|
|
store float %tmp77, ptr addrspace(1) %tmp458, align 4
|
|
%tmp459 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 11
|
|
store float %tmp84, ptr addrspace(1) %tmp459, align 4
|
|
%tmp460 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 12
|
|
store float %tmp91, ptr addrspace(1) %tmp460, align 4
|
|
%tmp461 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 13
|
|
store float %tmp98, ptr addrspace(1) %tmp461, align 4
|
|
%tmp462 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 14
|
|
store float %tmp105, ptr addrspace(1) %tmp462, align 4
|
|
%tmp463 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 15
|
|
store float %tmp112, ptr addrspace(1) %tmp463, align 4
|
|
%tmp464 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 16
|
|
store float %tmp119, ptr addrspace(1) %tmp464, align 4
|
|
%tmp465 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 17
|
|
store float %tmp126, ptr addrspace(1) %tmp465, align 4
|
|
%tmp466 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 18
|
|
store float %tmp133, ptr addrspace(1) %tmp466, align 4
|
|
%tmp467 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 19
|
|
store float %tmp140, ptr addrspace(1) %tmp467, align 4
|
|
%tmp468 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 20
|
|
store float %tmp147, ptr addrspace(1) %tmp468, align 4
|
|
%tmp469 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 21
|
|
store float %tmp154, ptr addrspace(1) %tmp469, align 4
|
|
%tmp470 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 22
|
|
store float %tmp161, ptr addrspace(1) %tmp470, align 4
|
|
%tmp471 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 23
|
|
store float %tmp168, ptr addrspace(1) %tmp471, align 4
|
|
%tmp472 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 24
|
|
store float %tmp175, ptr addrspace(1) %tmp472, align 4
|
|
%tmp473 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 25
|
|
store float %tmp182, ptr addrspace(1) %tmp473, align 4
|
|
%tmp474 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 26
|
|
store float %tmp189, ptr addrspace(1) %tmp474, align 4
|
|
%tmp475 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 27
|
|
store float %tmp196, ptr addrspace(1) %tmp475, align 4
|
|
%tmp476 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 28
|
|
store float %tmp203, ptr addrspace(1) %tmp476, align 4
|
|
%tmp477 = getelementptr inbounds float, ptr addrspace(1) %arg1, i64 29
|
|
store float %tmp210, ptr addrspace(1) %tmp477, align 4
|
|
br i1 %cc, label %bb1, label %bb2
|
|
|
|
bb2:
|
|
call void asm sideeffect "; use $0","{v[0:31]}"(<32 x i32> %vgpr0)
|
|
call void asm sideeffect "; use $0","{v[0:31]}"(<32 x i32> %vgpr1)
|
|
call void asm sideeffect "; use $0","{v[0:31]}"(<32 x i32> %vgpr2)
|
|
call void asm sideeffect "; use $0","{v[0:31]}"(<32 x i32> %vgpr3)
|
|
ret void
|
|
}
|
|
|
|
declare float @llvm.fmuladd.f32(float, float, float)
|