From 1e3121c80a02359fda87cf77ce1fd7bbd5066991 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sat, 19 Jun 2010 21:29:59 +0000 Subject: [PATCH] Include the use kind along with the expression in the key of the use sharing map. The reconcileNewOffset logic already forces a separate use if the kinds differ, so incorporating the kind in the key means we can track more sharing opportunities. More sharing means fewer total uses to track, which means smaller problem sizes, which means the conservative throttles don't kick in as often. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106396 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 30 +- test/CodeGen/X86/lsr-reuse.ll | 309 +++++++++++++++++++ 2 files changed, 337 insertions(+), 2 deletions(-) diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index c84e5a285b4..0d4fd2f4626 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1207,6 +1207,30 @@ static bool isAlwaysFoldable(const SCEV *S, namespace { +/// UseMapDenseMapInfo - A DenseMapInfo implementation for holding +/// DenseMaps and DenseSets of pairs of const SCEV* and LSRUse::Kind. +struct UseMapDenseMapInfo { + static std::pair getEmptyKey() { + return std::make_pair(reinterpret_cast(-1), LSRUse::Basic); + } + + static std::pair getTombstoneKey() { + return std::make_pair(reinterpret_cast(-2), LSRUse::Basic); + } + + static unsigned + getHashValue(const std::pair &V) { + unsigned Result = DenseMapInfo::getHashValue(V.first); + Result ^= DenseMapInfo::getHashValue(unsigned(V.second)); + return Result; + } + + static bool isEqual(const std::pair &LHS, + const std::pair &RHS) { + return LHS == RHS; + } +}; + /// FormulaSorter - This class implements an ordering for formulae which sorts /// the by their standalone cost. class FormulaSorter { @@ -1279,7 +1303,9 @@ class LSRInstance { } // Support for sharing of LSRUses between LSRFixups. - typedef DenseMap UseMapTy; + typedef DenseMap, + size_t, + UseMapDenseMapInfo> UseMapTy; UseMapTy UseMap; bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, @@ -1837,7 +1863,7 @@ LSRInstance::getUse(const SCEV *&Expr, } std::pair P = - UseMap.insert(std::make_pair(Expr, 0)); + UseMap.insert(std::make_pair(std::make_pair(Expr, Kind), 0)); if (!P.second) { // A use already existed with this base. size_t LUIdx = P.first->second; diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll index b80ee0897d8..3f4f9ec1d13 100644 --- a/test/CodeGen/X86/lsr-reuse.ll +++ b/test/CodeGen/X86/lsr-reuse.ll @@ -440,3 +440,312 @@ bb5: ; preds = %bb3, %entry %s.1.lcssa = phi i32 [ 0, %entry ], [ %s.0.lcssa, %bb3 ] ; [#uses=1] ret i32 %s.1.lcssa } + +; Two loops here are of particular interest; the one at %bb21, where +; we don't want to leave extra induction variables around, or use an +; lea to compute an exit condition inside the loop: + +; CHECK: test: + +; CHECK: BB10_4: +; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: addss %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: mulss (%r{{[^,]*}}), %xmm{{.*}} +; CHECK-NEXT: movss %xmm{{.*}}, (%r{{[^,]*}}) +; CHECK-NEXT: addq $4, %r{{.*}} +; CHECK-NEXT: decq %r{{.*}} +; CHECK-NEXT: addq $4, %r{{.*}} +; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: BB10_2: +; CHECK-NEXT: testq %r{{.*}}, %r{{.*}} +; CHECK-NEXT: jle +; CHECK-NEXT: testb $15, %r{{.*}} +; CHECK-NEXT: jne + +; And the one at %bb68, where we want to be sure to use superhero mode: + +; CHECK: BB10_10: +; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: mulps 48(%r{{[^,]*}}), %xmm{{.*}} +; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: mulps 32(%r{{[^,]*}}), %xmm{{.*}} +; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: mulps 16(%r{{[^,]*}}), %xmm{{.*}} +; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: mulps (%r{{[^,]*}}), %xmm{{.*}} +; CHECK-NEXT: movaps %xmm{{.*}}, (%r{{[^,]*}}) +; CHECK-NEXT: movaps %xmm{{.*}}, 16(%r{{[^,]*}}) +; CHECK-NEXT: movaps %xmm{{.*}}, 32(%r{{[^,]*}}) +; CHECK-NEXT: movaps %xmm{{.*}}, 48(%r{{[^,]*}}) +; CHECK-NEXT: addps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: addps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: addps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: addps %xmm{{.*}}, %xmm{{.*}} +; CHECK-NEXT: addq $64, %r{{.*}} +; CHECK-NEXT: addq $64, %r{{.*}} +; CHECK-NEXT: addq $-16, %r{{.*}} +; CHECK-NEXT: BB10_11: +; CHECK-NEXT: cmpq $15, %r{{.*}} +; CHECK-NEXT: jg + +define void @test(float* %arg, i64 %arg1, float* nocapture %arg2, float* nocapture %arg3, float* %arg4, i64 %arg5, i64 %arg6) nounwind { +bb: + %t = alloca float, align 4 ; [#uses=3] + %t7 = alloca float, align 4 ; [#uses=2] + %t8 = load float* %arg3 ; [#uses=8] + %t9 = ptrtoint float* %arg to i64 ; [#uses=1] + %t10 = ptrtoint float* %arg4 to i64 ; [#uses=1] + %t11 = xor i64 %t10, %t9 ; [#uses=1] + %t12 = and i64 %t11, 15 ; [#uses=1] + %t13 = icmp eq i64 %t12, 0 ; [#uses=1] + %t14 = xor i64 %arg1, 1 ; [#uses=1] + %t15 = xor i64 %arg5, 1 ; [#uses=1] + %t16 = or i64 %t15, %t14 ; [#uses=1] + %t17 = trunc i64 %t16 to i32 ; [#uses=1] + %t18 = icmp eq i32 %t17, 0 ; [#uses=1] + br i1 %t18, label %bb19, label %bb213 + +bb19: ; preds = %bb + %t20 = load float* %arg2 ; [#uses=1] + br label %bb21 + +bb21: ; preds = %bb32, %bb19 + %t22 = phi i64 [ %t36, %bb32 ], [ 0, %bb19 ] ; [#uses=21] + %t23 = phi float [ %t35, %bb32 ], [ %t20, %bb19 ] ; [#uses=6] + %t24 = sub i64 %arg6, %t22 ; [#uses=4] + %t25 = getelementptr float* %arg4, i64 %t22 ; [#uses=4] + %t26 = getelementptr float* %arg, i64 %t22 ; [#uses=3] + %t27 = icmp sgt i64 %t24, 0 ; [#uses=1] + br i1 %t27, label %bb28, label %bb37 + +bb28: ; preds = %bb21 + %t29 = ptrtoint float* %t25 to i64 ; [#uses=1] + %t30 = and i64 %t29, 15 ; [#uses=1] + %t31 = icmp eq i64 %t30, 0 ; [#uses=1] + br i1 %t31, label %bb37, label %bb32 + +bb32: ; preds = %bb28 + %t33 = load float* %t26 ; [#uses=1] + %t34 = fmul float %t23, %t33 ; [#uses=1] + store float %t34, float* %t25 + %t35 = fadd float %t23, %t8 ; [#uses=1] + %t36 = add i64 %t22, 1 ; [#uses=1] + br label %bb21 + +bb37: ; preds = %bb28, %bb21 + %t38 = fmul float %t8, 4.000000e+00 ; [#uses=1] + store float %t38, float* %t + %t39 = fmul float %t8, 1.600000e+01 ; [#uses=1] + store float %t39, float* %t7 + %t40 = fmul float %t8, 0.000000e+00 ; [#uses=1] + %t41 = fadd float %t23, %t40 ; [#uses=1] + %t42 = insertelement <4 x float> undef, float %t41, i32 0 ; <<4 x float>> [#uses=1] + %t43 = fadd float %t23, %t8 ; [#uses=1] + %t44 = insertelement <4 x float> %t42, float %t43, i32 1 ; <<4 x float>> [#uses=1] + %t45 = fmul float %t8, 2.000000e+00 ; [#uses=1] + %t46 = fadd float %t23, %t45 ; [#uses=1] + %t47 = insertelement <4 x float> %t44, float %t46, i32 2 ; <<4 x float>> [#uses=1] + %t48 = fmul float %t8, 3.000000e+00 ; [#uses=1] + %t49 = fadd float %t23, %t48 ; [#uses=1] + %t50 = insertelement <4 x float> %t47, float %t49, i32 3 ; <<4 x float>> [#uses=5] + %t51 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t) nounwind ; <<4 x float>> [#uses=3] + %t52 = fadd <4 x float> %t50, %t51 ; <<4 x float>> [#uses=3] + %t53 = fadd <4 x float> %t52, %t51 ; <<4 x float>> [#uses=3] + %t54 = fadd <4 x float> %t53, %t51 ; <<4 x float>> [#uses=2] + %t55 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t7) nounwind ; <<4 x float>> [#uses=8] + %t56 = icmp sgt i64 %t24, 15 ; [#uses=2] + br i1 %t13, label %bb57, label %bb118 + +bb57: ; preds = %bb37 + br i1 %t56, label %bb61, label %bb112 + +bb58: ; preds = %bb68 + %t59 = getelementptr float* %arg, i64 %t78 ; [#uses=1] + %t60 = getelementptr float* %arg4, i64 %t78 ; [#uses=1] + br label %bb112 + +bb61: ; preds = %bb57 + %t62 = add i64 %t22, 16 ; [#uses=1] + %t63 = add i64 %t22, 4 ; [#uses=1] + %t64 = add i64 %t22, 8 ; [#uses=1] + %t65 = add i64 %t22, 12 ; [#uses=1] + %t66 = add i64 %arg6, -16 ; [#uses=1] + %t67 = sub i64 %t66, %t22 ; [#uses=1] + br label %bb68 + +bb68: ; preds = %bb68, %bb61 + %t69 = phi i64 [ 0, %bb61 ], [ %t111, %bb68 ] ; [#uses=3] + %t70 = phi <4 x float> [ %t54, %bb61 ], [ %t107, %bb68 ] ; <<4 x float>> [#uses=2] + %t71 = phi <4 x float> [ %t50, %bb61 ], [ %t103, %bb68 ] ; <<4 x float>> [#uses=2] + %t72 = phi <4 x float> [ %t53, %bb61 ], [ %t108, %bb68 ] ; <<4 x float>> [#uses=2] + %t73 = phi <4 x float> [ %t52, %bb61 ], [ %t109, %bb68 ] ; <<4 x float>> [#uses=2] + %t74 = shl i64 %t69, 4 ; [#uses=5] + %t75 = add i64 %t22, %t74 ; [#uses=2] + %t76 = getelementptr float* %arg, i64 %t75 ; [#uses=1] + %t77 = bitcast float* %t76 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t78 = add i64 %t62, %t74 ; [#uses=2] + %t79 = add i64 %t63, %t74 ; [#uses=2] + %t80 = getelementptr float* %arg, i64 %t79 ; [#uses=1] + %t81 = bitcast float* %t80 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t82 = add i64 %t64, %t74 ; [#uses=2] + %t83 = getelementptr float* %arg, i64 %t82 ; [#uses=1] + %t84 = bitcast float* %t83 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t85 = add i64 %t65, %t74 ; [#uses=2] + %t86 = getelementptr float* %arg, i64 %t85 ; [#uses=1] + %t87 = bitcast float* %t86 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t88 = getelementptr float* %arg4, i64 %t75 ; [#uses=1] + %t89 = bitcast float* %t88 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t90 = getelementptr float* %arg4, i64 %t79 ; [#uses=1] + %t91 = bitcast float* %t90 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t92 = getelementptr float* %arg4, i64 %t82 ; [#uses=1] + %t93 = bitcast float* %t92 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t94 = getelementptr float* %arg4, i64 %t85 ; [#uses=1] + %t95 = bitcast float* %t94 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t96 = mul i64 %t69, -16 ; [#uses=1] + %t97 = add i64 %t67, %t96 ; [#uses=2] + %t98 = load <4 x float>* %t77 ; <<4 x float>> [#uses=1] + %t99 = load <4 x float>* %t81 ; <<4 x float>> [#uses=1] + %t100 = load <4 x float>* %t84 ; <<4 x float>> [#uses=1] + %t101 = load <4 x float>* %t87 ; <<4 x float>> [#uses=1] + %t102 = fmul <4 x float> %t98, %t71 ; <<4 x float>> [#uses=1] + %t103 = fadd <4 x float> %t71, %t55 ; <<4 x float>> [#uses=2] + %t104 = fmul <4 x float> %t99, %t73 ; <<4 x float>> [#uses=1] + %t105 = fmul <4 x float> %t100, %t72 ; <<4 x float>> [#uses=1] + %t106 = fmul <4 x float> %t101, %t70 ; <<4 x float>> [#uses=1] + store <4 x float> %t102, <4 x float>* %t89 + store <4 x float> %t104, <4 x float>* %t91 + store <4 x float> %t105, <4 x float>* %t93 + store <4 x float> %t106, <4 x float>* %t95 + %t107 = fadd <4 x float> %t70, %t55 ; <<4 x float>> [#uses=1] + %t108 = fadd <4 x float> %t72, %t55 ; <<4 x float>> [#uses=1] + %t109 = fadd <4 x float> %t73, %t55 ; <<4 x float>> [#uses=1] + %t110 = icmp sgt i64 %t97, 15 ; [#uses=1] + %t111 = add i64 %t69, 1 ; [#uses=1] + br i1 %t110, label %bb68, label %bb58 + +bb112: ; preds = %bb58, %bb57 + %t113 = phi float* [ %t59, %bb58 ], [ %t26, %bb57 ] ; [#uses=1] + %t114 = phi float* [ %t60, %bb58 ], [ %t25, %bb57 ] ; [#uses=1] + %t115 = phi <4 x float> [ %t103, %bb58 ], [ %t50, %bb57 ] ; <<4 x float>> [#uses=1] + %t116 = phi i64 [ %t97, %bb58 ], [ %t24, %bb57 ] ; [#uses=1] + %t117 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t) nounwind ; <<4 x float>> [#uses=0] + br label %bb194 + +bb118: ; preds = %bb37 + br i1 %t56, label %bb122, label %bb194 + +bb119: ; preds = %bb137 + %t120 = getelementptr float* %arg, i64 %t145 ; [#uses=1] + %t121 = getelementptr float* %arg4, i64 %t145 ; [#uses=1] + br label %bb194 + +bb122: ; preds = %bb118 + %t123 = add i64 %t22, -1 ; [#uses=1] + %t124 = getelementptr inbounds float* %arg, i64 %t123 ; [#uses=1] + %t125 = bitcast float* %t124 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t126 = load <4 x float>* %t125 ; <<4 x float>> [#uses=1] + %t127 = add i64 %t22, 16 ; [#uses=1] + %t128 = add i64 %t22, 3 ; [#uses=1] + %t129 = add i64 %t22, 7 ; [#uses=1] + %t130 = add i64 %t22, 11 ; [#uses=1] + %t131 = add i64 %t22, 15 ; [#uses=1] + %t132 = add i64 %t22, 4 ; [#uses=1] + %t133 = add i64 %t22, 8 ; [#uses=1] + %t134 = add i64 %t22, 12 ; [#uses=1] + %t135 = add i64 %arg6, -16 ; [#uses=1] + %t136 = sub i64 %t135, %t22 ; [#uses=1] + br label %bb137 + +bb137: ; preds = %bb137, %bb122 + %t138 = phi i64 [ 0, %bb122 ], [ %t193, %bb137 ] ; [#uses=3] + %t139 = phi <4 x float> [ %t54, %bb122 ], [ %t189, %bb137 ] ; <<4 x float>> [#uses=2] + %t140 = phi <4 x float> [ %t50, %bb122 ], [ %t185, %bb137 ] ; <<4 x float>> [#uses=2] + %t141 = phi <4 x float> [ %t53, %bb122 ], [ %t190, %bb137 ] ; <<4 x float>> [#uses=2] + %t142 = phi <4 x float> [ %t52, %bb122 ], [ %t191, %bb137 ] ; <<4 x float>> [#uses=2] + %t143 = phi <4 x float> [ %t126, %bb122 ], [ %t175, %bb137 ] ; <<4 x float>> [#uses=1] + %t144 = shl i64 %t138, 4 ; [#uses=9] + %t145 = add i64 %t127, %t144 ; [#uses=2] + %t146 = add i64 %t128, %t144 ; [#uses=1] + %t147 = getelementptr float* %arg, i64 %t146 ; [#uses=1] + %t148 = bitcast float* %t147 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t149 = add i64 %t129, %t144 ; [#uses=1] + %t150 = getelementptr float* %arg, i64 %t149 ; [#uses=1] + %t151 = bitcast float* %t150 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t152 = add i64 %t130, %t144 ; [#uses=1] + %t153 = getelementptr float* %arg, i64 %t152 ; [#uses=1] + %t154 = bitcast float* %t153 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t155 = add i64 %t131, %t144 ; [#uses=1] + %t156 = getelementptr float* %arg, i64 %t155 ; [#uses=1] + %t157 = bitcast float* %t156 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t158 = add i64 %t22, %t144 ; [#uses=1] + %t159 = getelementptr float* %arg4, i64 %t158 ; [#uses=1] + %t160 = bitcast float* %t159 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t161 = add i64 %t132, %t144 ; [#uses=1] + %t162 = getelementptr float* %arg4, i64 %t161 ; [#uses=1] + %t163 = bitcast float* %t162 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t164 = add i64 %t133, %t144 ; [#uses=1] + %t165 = getelementptr float* %arg4, i64 %t164 ; [#uses=1] + %t166 = bitcast float* %t165 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t167 = add i64 %t134, %t144 ; [#uses=1] + %t168 = getelementptr float* %arg4, i64 %t167 ; [#uses=1] + %t169 = bitcast float* %t168 to <4 x float>* ; <<4 x float>*> [#uses=1] + %t170 = mul i64 %t138, -16 ; [#uses=1] + %t171 = add i64 %t136, %t170 ; [#uses=2] + %t172 = load <4 x float>* %t148 ; <<4 x float>> [#uses=2] + %t173 = load <4 x float>* %t151 ; <<4 x float>> [#uses=2] + %t174 = load <4 x float>* %t154 ; <<4 x float>> [#uses=2] + %t175 = load <4 x float>* %t157 ; <<4 x float>> [#uses=2] + %t176 = shufflevector <4 x float> %t143, <4 x float> %t172, <4 x i32> ; <<4 x float>> [#uses=1] + %t177 = shufflevector <4 x float> %t176, <4 x float> undef, <4 x i32> ; <<4 x float>> [#uses=1] + %t178 = shufflevector <4 x float> %t172, <4 x float> %t173, <4 x i32> ; <<4 x float>> [#uses=1] + %t179 = shufflevector <4 x float> %t178, <4 x float> undef, <4 x i32> ; <<4 x float>> [#uses=1] + %t180 = shufflevector <4 x float> %t173, <4 x float> %t174, <4 x i32> ; <<4 x float>> [#uses=1] + %t181 = shufflevector <4 x float> %t180, <4 x float> undef, <4 x i32> ; <<4 x float>> [#uses=1] + %t182 = shufflevector <4 x float> %t174, <4 x float> %t175, <4 x i32> ; <<4 x float>> [#uses=1] + %t183 = shufflevector <4 x float> %t182, <4 x float> undef, <4 x i32> ; <<4 x float>> [#uses=1] + %t184 = fmul <4 x float> %t177, %t140 ; <<4 x float>> [#uses=1] + %t185 = fadd <4 x float> %t140, %t55 ; <<4 x float>> [#uses=2] + %t186 = fmul <4 x float> %t179, %t142 ; <<4 x float>> [#uses=1] + %t187 = fmul <4 x float> %t181, %t141 ; <<4 x float>> [#uses=1] + %t188 = fmul <4 x float> %t183, %t139 ; <<4 x float>> [#uses=1] + store <4 x float> %t184, <4 x float>* %t160 + store <4 x float> %t186, <4 x float>* %t163 + store <4 x float> %t187, <4 x float>* %t166 + store <4 x float> %t188, <4 x float>* %t169 + %t189 = fadd <4 x float> %t139, %t55 ; <<4 x float>> [#uses=1] + %t190 = fadd <4 x float> %t141, %t55 ; <<4 x float>> [#uses=1] + %t191 = fadd <4 x float> %t142, %t55 ; <<4 x float>> [#uses=1] + %t192 = icmp sgt i64 %t171, 15 ; [#uses=1] + %t193 = add i64 %t138, 1 ; [#uses=1] + br i1 %t192, label %bb137, label %bb119 + +bb194: ; preds = %bb119, %bb118, %bb112 + %t195 = phi i64 [ %t116, %bb112 ], [ %t171, %bb119 ], [ %t24, %bb118 ] ; [#uses=2] + %t196 = phi <4 x float> [ %t115, %bb112 ], [ %t185, %bb119 ], [ %t50, %bb118 ] ; <<4 x float>> [#uses=1] + %t197 = phi float* [ %t114, %bb112 ], [ %t121, %bb119 ], [ %t25, %bb118 ] ; [#uses=1] + %t198 = phi float* [ %t113, %bb112 ], [ %t120, %bb119 ], [ %t26, %bb118 ] ; [#uses=1] + %t199 = extractelement <4 x float> %t196, i32 0 ; [#uses=2] + %t200 = icmp sgt i64 %t195, 0 ; [#uses=1] + br i1 %t200, label %bb201, label %bb211 + +bb201: ; preds = %bb201, %bb194 + %t202 = phi i64 [ %t209, %bb201 ], [ 0, %bb194 ] ; [#uses=3] + %t203 = phi float [ %t208, %bb201 ], [ %t199, %bb194 ] ; [#uses=2] + %t204 = getelementptr float* %t198, i64 %t202 ; [#uses=1] + %t205 = getelementptr float* %t197, i64 %t202 ; [#uses=1] + %t206 = load float* %t204 ; [#uses=1] + %t207 = fmul float %t203, %t206 ; [#uses=1] + store float %t207, float* %t205 + %t208 = fadd float %t203, %t8 ; [#uses=2] + %t209 = add i64 %t202, 1 ; [#uses=2] + %t210 = icmp eq i64 %t209, %t195 ; [#uses=1] + br i1 %t210, label %bb211, label %bb201 + +bb211: ; preds = %bb201, %bb194 + %t212 = phi float [ %t199, %bb194 ], [ %t208, %bb201 ] ; [#uses=1] + store float %t212, float* %arg2 + ret void + +bb213: ; preds = %bb + ret void +} -- 2.34.1