+/// TryToFoldLoadOpStore - Given a store node, try to fold together a
+/// load/op/store instruction. If successful return true.
+bool ISel::TryToFoldLoadOpStore(SDNode *Node) {
+ assert(Node->getOpcode() == ISD::STORE && "Can only do this for stores!");
+ SDOperand Chain = Node->getOperand(0);
+ SDOperand StVal = Node->getOperand(1);
+ SDOperand StPtr = Node->getOperand(2);
+
+ // The chain has to be a load, the stored value must be an integer binary
+ // operation with one use.
+ if (!StVal.Val->hasOneUse() || StVal.Val->getNumOperands() != 2 ||
+ MVT::isFloatingPoint(StVal.getValueType()))
+ return false;
+
+ // Token chain must either be a factor node or the load to fold.
+ if (Chain.getOpcode() != ISD::LOAD && Chain.getOpcode() != ISD::TokenFactor)
+ return false;
+
+ SDOperand TheLoad;
+
+ // Check to see if there is a load from the same pointer that we're storing
+ // to in either operand of the binop.
+ if (StVal.getOperand(0).getOpcode() == ISD::LOAD &&
+ StVal.getOperand(0).getOperand(1) == StPtr)
+ TheLoad = StVal.getOperand(0);
+ else if (StVal.getOperand(1).getOpcode() == ISD::LOAD &&
+ StVal.getOperand(1).getOperand(1) == StPtr)
+ TheLoad = StVal.getOperand(1);
+ else
+ return false; // No matching load operand.
+
+ // We can only fold the load if there are no intervening side-effecting
+ // operations. This means that the store uses the load as its token chain, or
+ // there are only token factor nodes in between the store and load.
+ if (Chain != TheLoad.getValue(1)) {
+ // Okay, the other option is that we have a store referring to (possibly
+ // nested) token factor nodes. For now, just try peeking through one level
+ // of token factors to see if this is the case.
+ bool ChainOk = false;
+ if (Chain.getOpcode() == ISD::TokenFactor) {
+ for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
+ if (Chain.getOperand(i) == TheLoad.getValue(1)) {
+ ChainOk = true;
+ break;
+ }
+ }
+
+ if (!ChainOk) return false;
+ }
+
+ if (TheLoad.getOperand(1) != StPtr)
+ return false;
+
+ // Make sure that one of the operands of the binop is the load, and that the
+ // load folds into the binop.
+ if (((StVal.getOperand(0) != TheLoad ||
+ !isFoldableLoad(TheLoad, StVal.getOperand(1))) &&
+ (StVal.getOperand(1) != TheLoad ||
+ !isFoldableLoad(TheLoad, StVal.getOperand(0)))))
+ return false;
+
+ // Finally, check to see if this is one of the ops we can handle!
+ static const unsigned ADDTAB[] = {
+ X86::ADD8mi, X86::ADD16mi, X86::ADD32mi,
+ X86::ADD8mr, X86::ADD16mr, X86::ADD32mr,
+ };
+ static const unsigned SUBTAB[] = {
+ X86::SUB8mi, X86::SUB16mi, X86::SUB32mi,
+ X86::SUB8mr, X86::SUB16mr, X86::SUB32mr,
+ };
+ static const unsigned ANDTAB[] = {
+ X86::AND8mi, X86::AND16mi, X86::AND32mi,
+ X86::AND8mr, X86::AND16mr, X86::AND32mr,
+ };
+ static const unsigned ORTAB[] = {
+ X86::OR8mi, X86::OR16mi, X86::OR32mi,
+ X86::OR8mr, X86::OR16mr, X86::OR32mr,
+ };
+ static const unsigned XORTAB[] = {
+ X86::XOR8mi, X86::XOR16mi, X86::XOR32mi,
+ X86::XOR8mr, X86::XOR16mr, X86::XOR32mr,
+ };
+ static const unsigned SHLTAB[] = {
+ X86::SHL8mi, X86::SHL16mi, X86::SHL32mi,
+ /*Have to put the reg in CL*/0, 0, 0,
+ };
+ static const unsigned SARTAB[] = {
+ X86::SAR8mi, X86::SAR16mi, X86::SAR32mi,
+ /*Have to put the reg in CL*/0, 0, 0,
+ };
+ static const unsigned SHRTAB[] = {
+ X86::SHR8mi, X86::SHR16mi, X86::SHR32mi,
+ /*Have to put the reg in CL*/0, 0, 0,
+ };
+
+ const unsigned *TabPtr = 0;
+ switch (StVal.getOpcode()) {
+ default:
+ std::cerr << "CANNOT [mem] op= val: ";
+ StVal.Val->dump(); std::cerr << "\n";
+ case ISD::MUL:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM: return false;
+
+ case ISD::ADD: TabPtr = ADDTAB; break;
+ case ISD::SUB: TabPtr = SUBTAB; break;
+ case ISD::AND: TabPtr = ANDTAB; break;
+ case ISD:: OR: TabPtr = ORTAB; break;
+ case ISD::XOR: TabPtr = XORTAB; break;
+ case ISD::SHL: TabPtr = SHLTAB; break;
+ case ISD::SRA: TabPtr = SARTAB; break;
+ case ISD::SRL: TabPtr = SHRTAB; break;
+ }
+
+ // Handle: [mem] op= CST
+ SDOperand Op0 = StVal.getOperand(0);
+ SDOperand Op1 = StVal.getOperand(1);
+ unsigned Opc;
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
+ switch (Op0.getValueType()) { // Use Op0's type because of shifts.
+ default: break;
+ case MVT::i1:
+ case MVT::i8: Opc = TabPtr[0]; break;
+ case MVT::i16: Opc = TabPtr[1]; break;
+ case MVT::i32: Opc = TabPtr[2]; break;
+ }
+
+ if (Opc) {
+ LoweredTokens.insert(TheLoad.getValue(1));
+ Select(Chain);
+
+ X86AddressMode AM;
+ if (getRegPressure(TheLoad.getOperand(0)) >
+ getRegPressure(TheLoad.getOperand(1))) {
+ Select(TheLoad.getOperand(0));
+ SelectAddress(TheLoad.getOperand(1), AM);
+ } else {
+ SelectAddress(TheLoad.getOperand(1), AM);
+ Select(TheLoad.getOperand(0));
+ }
+
+ if (StVal.getOpcode() == ISD::ADD) {
+ if (CN->getValue() == 1) {
+ switch (Op0.getValueType()) {
+ default: break;
+ case MVT::i8:
+ addFullAddress(BuildMI(BB, X86::INC8m, 4), AM);
+ return true;
+ case MVT::i16: Opc = TabPtr[1];
+ addFullAddress(BuildMI(BB, X86::INC16m, 4), AM);
+ return true;
+ case MVT::i32: Opc = TabPtr[2];
+ addFullAddress(BuildMI(BB, X86::INC32m, 4), AM);
+ return true;
+ }
+ } else if (CN->getValue()+1 == 0) { // [X] += -1 -> DEC [X]
+ switch (Op0.getValueType()) {
+ default: break;
+ case MVT::i8:
+ addFullAddress(BuildMI(BB, X86::DEC8m, 4), AM);
+ return true;
+ case MVT::i16: Opc = TabPtr[1];
+ addFullAddress(BuildMI(BB, X86::DEC16m, 4), AM);
+ return true;
+ case MVT::i32: Opc = TabPtr[2];
+ addFullAddress(BuildMI(BB, X86::DEC32m, 4), AM);
+ return true;
+ }
+ }
+ }
+
+ addFullAddress(BuildMI(BB, Opc, 4+1),AM).addImm(CN->getValue());
+ return true;
+ }
+ }
+
+ // If we have [mem] = V op [mem], try to turn it into:
+ // [mem] = [mem] op V.
+ if (Op1 == TheLoad && StVal.getOpcode() != ISD::SUB &&
+ StVal.getOpcode() != ISD::SHL && StVal.getOpcode() != ISD::SRA &&
+ StVal.getOpcode() != ISD::SRL)
+ std::swap(Op0, Op1);
+
+ if (Op0 != TheLoad) return false;
+
+ switch (Op0.getValueType()) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8: Opc = TabPtr[3]; break;
+ case MVT::i16: Opc = TabPtr[4]; break;
+ case MVT::i32: Opc = TabPtr[5]; break;
+ }
+
+ LoweredTokens.insert(TheLoad.getValue(1));
+ Select(Chain);
+
+ Select(TheLoad.getOperand(0));
+ X86AddressMode AM;
+ SelectAddress(TheLoad.getOperand(1), AM);
+ unsigned Reg = SelectExpr(Op1);
+ addFullAddress(BuildMI(BB, Opc, 4+1),AM).addReg(Reg);
+ return true;
+}
+
+