static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
SelectionDAG *DAG) {
SmallVector<EVT, 4> VTs;
+ SDNode *FlagDestNode = Flag.getNode();
- for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
- VTs.push_back(N->getValueType(i));
+ // Don't add a flag from a node to itself.
+ if (FlagDestNode == N) return;
+
+ // Don't add a flag to something which already has a flag.
+ if (N->getValueType(N->getNumValues() - 1) == MVT::Flag) return;
+
+ for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
+ VTs.push_back(N->getValueType(I));
if (AddFlag)
VTs.push_back(MVT::Flag);
SmallVector<SDValue, 4> Ops;
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- Ops.push_back(N->getOperand(i));
+ for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
+ Ops.push_back(N->getOperand(I));
- if (Flag.getNode())
+ if (FlagDestNode)
Ops.push_back(Flag);
SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
// Cluster loads by adding MVT::Flag outputs and inputs. This also
// ensure they are scheduled in order of increasing addresses.
SDNode *Lead = Loads[0];
- AddFlags(Lead, SDValue(0,0), true, DAG);
+ AddFlags(Lead, SDValue(0, 0), true, DAG);
+
+ SDValue InFlag = SDValue(Lead, Lead->getNumValues() - 1);
+ for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
+ bool OutFlag = I < E - 1;
+ SDNode *Load = Loads[I];
- SDValue InFlag = SDValue(Lead, Lead->getNumValues()-1);
- for (unsigned i = 1, e = Loads.size(); i != e; ++i) {
- bool OutFlag = i < e-1;
- SDNode *Load = Loads[i];
AddFlags(Load, InFlag, OutFlag, DAG);
if (OutFlag)
- InFlag = SDValue(Load, Load->getNumValues()-1);
+ InFlag = SDValue(Load, Load->getNumValues() - 1);
++LoadsClustered;
}
--- /dev/null
+; RUN: llc < %s -O3 -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 -relocation-model=pic
+; PR7484
+
+%struct.gs_matrix = type { float, i32, float, i32, float, i32, float, i32, float, i32, float, i32 }
+
+define fastcc void @func(%struct.gs_matrix* nocapture %pm1) nounwind {
+entry:
+ %0 = getelementptr inbounds %struct.gs_matrix* %pm1, i32 0, i32 6
+ %1 = load float* %0, align 4
+ %2 = getelementptr inbounds %struct.gs_matrix* %pm1, i32 0, i32 8
+ %3 = load float* %2, align 4
+ %4 = getelementptr inbounds %struct.gs_matrix* %pm1, i32 0, i32 2
+ %5 = bitcast float* %4 to i32*
+ %6 = load i32* %5, align 4
+ %7 = or i32 0, %6
+ %.mask = and i32 %7, 2147483647
+ %8 = icmp eq i32 %.mask, 0
+ br i1 %8, label %bb, label %bb11
+
+bb:
+ ret void
+
+bb11:
+ %9 = fmul float %1, undef
+ %10 = fmul float %3, undef
+ ret void
+}