[SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
+def SDTUnaryArithOvf : SDTypeProfile<1, 1,
+ [SDTCisInt<0>]>;
+def SDTBinaryArithOvf : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisInt<0>]>;
+
def SDTX86BrCond : SDTypeProfile<0, 3,
[SDTCisVT<0, OtherVT>,
SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
SDTCisVT<2, i8>]>;
def SDTX86cas8 : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def SDTX86atomicBinary : SDTypeProfile<2, 3, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisPtrTy<2>, SDTCisInt<3>,SDTCisInt<4>]>;
def SDTX86Ret : SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>;
def SDT_X86CallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
def SDT_X86CallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
-def SDT_X86Call : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
+def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>;
+def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>;
+
def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>;
def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond,
[SDNPHasChain]>;
def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8,
[SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
SDNPMayLoad]>;
-
+def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
+ [SDNPHasChain, SDNPMayStore,
+ SDNPMayLoad, SDNPMemOperand]>;
def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
[SDNPHasChain, SDNPOptInFlag]>;
def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
[SDNPHasChain, SDNPOptInFlag]>;
+def X86add_ovf : SDNode<"X86ISD::ADD", SDTBinaryArithOvf>;
+def X86sub_ovf : SDNode<"X86ISD::SUB", SDTBinaryArithOvf>;
+def X86smul_ovf : SDNode<"X86ISD::SMUL", SDTBinaryArithOvf>;
+def X86umul_ovf : SDNode<"X86ISD::UMUL", SDTUnaryArithOvf>;
+
//===----------------------------------------------------------------------===//
// X86 Operand Definitions.
//
def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">;
def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
+def OptForSpeed : Predicate<"!OptForSize">;
+def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
// X86 specific condition code. These correspond to CondCode in
// X86InstrInfo.h. They must be kept in synch.
-def X86_COND_A : PatLeaf<(i8 0)>;
-def X86_COND_AE : PatLeaf<(i8 1)>;
-def X86_COND_B : PatLeaf<(i8 2)>;
-def X86_COND_BE : PatLeaf<(i8 3)>;
-def X86_COND_E : PatLeaf<(i8 4)>;
-def X86_COND_G : PatLeaf<(i8 5)>;
-def X86_COND_GE : PatLeaf<(i8 6)>;
-def X86_COND_L : PatLeaf<(i8 7)>;
-def X86_COND_LE : PatLeaf<(i8 8)>;
-def X86_COND_NE : PatLeaf<(i8 9)>;
+def X86_COND_A : PatLeaf<(i8 0)>; // alt. COND_NBE
+def X86_COND_AE : PatLeaf<(i8 1)>; // alt. COND_NC
+def X86_COND_B : PatLeaf<(i8 2)>; // alt. COND_C
+def X86_COND_BE : PatLeaf<(i8 3)>; // alt. COND_NA
+def X86_COND_E : PatLeaf<(i8 4)>; // alt. COND_Z
+def X86_COND_G : PatLeaf<(i8 5)>; // alt. COND_NLE
+def X86_COND_GE : PatLeaf<(i8 6)>; // alt. COND_NL
+def X86_COND_L : PatLeaf<(i8 7)>; // alt. COND_NGE
+def X86_COND_LE : PatLeaf<(i8 8)>; // alt. COND_NG
+def X86_COND_NE : PatLeaf<(i8 9)>; // alt. COND_NZ
def X86_COND_NO : PatLeaf<(i8 10)>;
-def X86_COND_NP : PatLeaf<(i8 11)>;
+def X86_COND_NP : PatLeaf<(i8 11)>; // alt. COND_PO
def X86_COND_NS : PatLeaf<(i8 12)>;
def X86_COND_O : PatLeaf<(i8 13)>;
-def X86_COND_P : PatLeaf<(i8 14)>;
+def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE
def X86_COND_S : PatLeaf<(i8 15)>;
def i16immSExt8 : PatLeaf<(i16 imm), [{
// i16immSExt8 predicate - True if the 16-bit immediate fits in a 8-bit
// sign extended field.
- return (int16_t)N->getValue() == (int8_t)N->getValue();
+ return (int16_t)N->getZExtValue() == (int8_t)N->getZExtValue();
}]>;
def i32immSExt8 : PatLeaf<(i32 imm), [{
// i32immSExt8 predicate - True if the 32-bit immediate fits in a 8-bit
// sign extended field.
- return (int32_t)N->getValue() == (int8_t)N->getValue();
+ return (int32_t)N->getZExtValue() == (int8_t)N->getZExtValue();
}]>;
// Helper fragments for loads.
// It's always safe to treat a anyext i16 load as a i32 load if the i16 is
// known to be 32-bit aligned or better. Ditto for i8 to i16.
-def loadi16 : PatFrag<(ops node:$ptr), (i16 (ld node:$ptr)), [{
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
- if (LD->getAddressingMode() != ISD::UNINDEXED)
- return false;
- ISD::LoadExtType ExtType = LD->getExtensionType();
- if (ExtType == ISD::NON_EXTLOAD)
- return true;
- if (ExtType == ISD::EXTLOAD)
- return LD->getAlignment() >= 16 && !LD->isVolatile();
- }
+def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ return true;
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 2 && !LD->isVolatile();
return false;
}]>;
-def loadi32 : PatFrag<(ops node:$ptr), (i32 (ld node:$ptr)), [{
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
- if (LD->getAddressingMode() != ISD::UNINDEXED)
- return false;
- ISD::LoadExtType ExtType = LD->getExtensionType();
- if (ExtType == ISD::NON_EXTLOAD)
- return true;
- if (ExtType == ISD::EXTLOAD)
- return LD->getAlignment() >= 32 && !LD->isVolatile();
- }
+def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 2 && !LD->isVolatile();
+ return false;
+}]>;
+
+def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ return true;
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 4 && !LD->isVolatile();
+ return false;
+}]>;
+
+def nvloadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (LD->isVolatile())
+ return false;
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ return true;
+ if (ExtType == ISD::EXTLOAD)
+ return LD->getAlignment() >= 4;
+ return false;
+}]>;
+
+def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ const Value *Src = LD->getSrcValue();
+ if (!Src)
+ return false;
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ return PT->getAddressSpace() == 256;
return false;
}]>;
return N->hasOneUse();
}]>;
+// 'shld' and 'shrd' instruction patterns. Note that even though these have
+// the srl and shl in their patterns, the C++ code must still check for them,
+// because predicates are tested before children nodes are explored.
+
+def shrd : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2),
+ (or (srl node:$src1, node:$amt1),
+ (shl node:$src2, node:$amt2)), [{
+ assert(N->getOpcode() == ISD::OR);
+ return N->getOperand(0).getOpcode() == ISD::SRL &&
+ N->getOperand(1).getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N->getOperand(0).getOperand(1)) &&
+ isa<ConstantSDNode>(N->getOperand(1).getOperand(1)) &&
+ N->getOperand(0).getConstantOperandVal(1) ==
+ N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1);
+}]>;
+
+def shld : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2),
+ (or (shl node:$src1, node:$amt1),
+ (srl node:$src2, node:$amt2)), [{
+ assert(N->getOpcode() == ISD::OR);
+ return N->getOperand(0).getOpcode() == ISD::SHL &&
+ N->getOperand(1).getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(N->getOperand(0).getOperand(1)) &&
+ isa<ConstantSDNode>(N->getOperand(1).getOperand(1)) &&
+ N->getOperand(0).getConstantOperandVal(1) ==
+ N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1);
+}]>;
+
//===----------------------------------------------------------------------===//
// Instruction list...
//
// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
// sub / add which can clobber EFLAGS.
let Defs = [ESP, EFLAGS], Uses = [ESP] in {
-def ADJCALLSTACKDOWN : I<0, Pseudo, (outs), (ins i32imm:$amt),
- "#ADJCALLSTACKDOWN",
- [(X86callseq_start imm:$amt)]>;
-def ADJCALLSTACKUP : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
- "#ADJCALLSTACKUP",
- [(X86callseq_end imm:$amt1, imm:$amt2)]>;
+def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+ "#ADJCALLSTACKDOWN",
+ [(X86callseq_start timm:$amt)]>,
+ Requires<[In32BitMode]>;
+def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "#ADJCALLSTACKUP",
+ [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+ Requires<[In32BitMode]>;
}
// Nop
def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
// PIC base
-let neverHasSideEffects = 1, isNotDuplicable = 1 in
+let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins piclabel:$label),
"call\t$label\n\tpop{l}\t$reg", []>;
hasCtrlDep = 1, FPForm = SpecialFP, FPFormBits = SpecialFP.Value in {
def RET : I <0xC3, RawFrm, (outs), (ins variable_ops),
"ret",
- [/*(X86retflag 0)*/ /*FIXME: Disabled: rdar://5791600*/]>;
+ [(X86retflag 0)]>;
def RETI : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
"ret\t$amt",
[(X86retflag imm:$amt)]>;
// Call Instructions...
//
let isCall = 1 in
- // All calls clobber the non-callee saved registers...
+ // All calls clobber the non-callee saved registers. ESP is marked as
+ // a use to prevent stack-pointer assignments that appear immediately
+ // before calls from potentially appearing dead. Uses for argument
+ // registers are added manually.
let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
- XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, EFLAGS] in {
+ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+ Uses = [ESP] in {
def CALLpcrel32 : Ii32<0xE8, RawFrm, (outs), (ins i32imm:$dst,variable_ops),
"call\t${dst:call}", []>;
def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
"call\t{*}$dst", [(X86call GR32:$dst)]>;
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
- "call\t{*}$dst", []>;
+ "call\t{*}$dst", [(X86call (loadi32 addr:$dst))]>;
}
// Tail call stuff.
def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>;
}
-let isReMaterializable = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
[(set GR8:$dst, imm:$src)]>;
"mov{l}\t{$src, $dst|$dst, $src}",
[(store (i32 imm:$src), addr:$dst)]>;
-let isSimpleLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
[(set GR8:$dst, (load addr:$src))]>;
// FIXME: Used for 8-bit mul, ignore result upper 8 bits.
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
- [(set AL, (mul AL, GR8:$src))]>; // AL,AH = AL*GR8
+ [(set AL, (mul AL, GR8:$src)),
+ (implicit EFLAGS)]>; // AL,AH = AL*GR8
+
let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
-def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), "mul{w}\t$src", []>,
- OpSize; // AX,DX = AX*GR16
+def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
+ "mul{w}\t$src",
+ []>, OpSize; // AX,DX = AX*GR16
+
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
-def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), "mul{l}\t$src", []>;
- // EAX,EDX = EAX*GR32
+def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
+ "mul{l}\t$src",
+ []>; // EAX,EDX = EAX*GR32
+
let Defs = [AL,AH,EFLAGS], Uses = [AL] in
def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
"mul{b}\t$src",
// FIXME: Used for 8-bit mul, ignore result upper 8 bits.
// This probably ought to be moved to a def : Pat<> if the
// syntax can be accepted.
- [(set AL, (mul AL, (loadi8 addr:$src)))]>; // AL,AH = AL*[mem8]
+ [(set AL, (mul AL, (loadi8 addr:$src))),
+ (implicit EFLAGS)]>; // AL,AH = AL*[mem8]
+
let mayLoad = 1, neverHasSideEffects = 1 in {
let Defs = [AX,DX,EFLAGS], Uses = [AX] in
def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
- "mul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16]
+ "mul{w}\t$src",
+ []>, OpSize; // AX,DX = AX*[mem16]
+
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
- "mul{l}\t$src", []>; // EAX,EDX = EAX*[mem32]
+ "mul{l}\t$src",
+ []>; // EAX,EDX = EAX*[mem32]
}
let neverHasSideEffects = 1 in {
def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
"imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32]
}
+} // neverHasSideEffects
// unsigned division/remainder
-let Defs = [AX,EFLAGS], Uses = [AL,AH] in
+let Defs = [AL,AH,EFLAGS], Uses = [AX] in
def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
"div{b}\t$src", []>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
"div{l}\t$src", []>;
let mayLoad = 1 in {
-let Defs = [AX,EFLAGS], Uses = [AL,AH] in
+let Defs = [AL,AH,EFLAGS], Uses = [AX] in
def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
"div{b}\t$src", []>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
}
// Signed division/remainder.
-let Defs = [AX,EFLAGS], Uses = [AL,AH] in
+let Defs = [AL,AH,EFLAGS], Uses = [AX] in
def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
"idiv{b}\t$src", []>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
"idiv{l}\t$src", []>;
let mayLoad = 1, mayLoad = 1 in {
-let Defs = [AX,EFLAGS], Uses = [AL,AH] in
+let Defs = [AL,AH,EFLAGS], Uses = [AX] in
def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
"idiv{b}\t$src", []>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), // EDX:EAX/[mem32] = EAX,EDX
"idiv{l}\t$src", []>;
}
-} // neverHasSideEffects
//===----------------------------------------------------------------------===//
// Two address Instructions.
[(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
X86_COND_B, EFLAGS))]>,
TB;
-
def CMOVAE16rr: I<0x43, MRMSrcReg, // if >=u, GR16 = GR16
(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"cmovae\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
X86_COND_NP, EFLAGS))]>,
TB;
-} // isCommutable = 1
-
-def CMOVNP32rm : I<0x4B, MRMSrcMem, // if !parity, GR32 = [mem32]
- (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "cmovnp\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
- X86_COND_NP, EFLAGS))]>,
+def CMOVO16rr : I<0x40, MRMSrcReg, // if overflow, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovo\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_O, EFLAGS))]>,
+ TB, OpSize;
+def CMOVO32rr : I<0x40, MRMSrcReg, // if overflow, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovo\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_O, EFLAGS))]>,
+ TB;
+def CMOVNO16rr : I<0x41, MRMSrcReg, // if !overflow, GR16 = GR16
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "cmovno\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2,
+ X86_COND_NO, EFLAGS))]>,
+ TB, OpSize;
+def CMOVNO32rr : I<0x41, MRMSrcReg, // if !overflow, GR32 = GR32
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "cmovno\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, GR32:$src2,
+ X86_COND_NO, EFLAGS))]>,
TB;
+} // isCommutable = 1
def CMOVB16rm : I<0x42, MRMSrcMem, // if <u, GR16 = [mem16]
(outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
[(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
X86_COND_NP, EFLAGS))]>,
TB, OpSize;
+def CMOVNP32rm : I<0x4B, MRMSrcMem, // if !parity, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovnp\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_NP, EFLAGS))]>,
+ TB;
+def CMOVO16rm : I<0x40, MRMSrcMem, // if overflow, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovo\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_O, EFLAGS))]>,
+ TB, OpSize;
+def CMOVO32rm : I<0x40, MRMSrcMem, // if overflow, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovo\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_O, EFLAGS))]>,
+ TB;
+def CMOVNO16rm : I<0x41, MRMSrcMem, // if !overflow, GR16 = [mem16]
+ (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+ "cmovno\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+ X86_COND_NO, EFLAGS))]>,
+ TB, OpSize;
+def CMOVNO32rm : I<0x41, MRMSrcMem, // if !overflow, GR32 = [mem32]
+ (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+ "cmovno\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+ X86_COND_NO, EFLAGS))]>,
+ TB;
} // Uses = [EFLAGS]
}
} // Defs = [EFLAGS]
+// Match xor -1 to not. Favors these over a move imm + xor to save code size.
+let AddedComplexity = 15 in {
def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src), "not{b}\t$dst",
[(set GR8:$dst, (not GR8:$src))]>;
def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src), "not{w}\t$dst",
[(set GR16:$dst, (not GR16:$src))]>, OpSize;
def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src), "not{l}\t$dst",
[(set GR32:$dst, (not GR32:$src))]>;
+}
let isTwoAddress = 0 in {
def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b}\t$dst",
[(store (not (loadi8 addr:$dst)), addr:$dst)]>;
def OR32mi8 : Ii8<0x83, MRM1m, (outs), (ins i32mem:$dst, i32i8imm:$src),
"or{l}\t{$src, $dst|$dst, $src}",
[(store (or (load addr:$dst), i32immSExt8:$src), addr:$dst)]>;
-}
+} // isTwoAddress = 0
-let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
-def XOR8rr : I<0x30, MRMDestReg,
- (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
- "xor{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (xor GR8:$src1, GR8:$src2))]>;
-def XOR16rr : I<0x31, MRMDestReg,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (xor GR16:$src1, GR16:$src2))]>, OpSize;
-def XOR32rr : I<0x31, MRMDestReg,
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (xor GR32:$src1, GR32:$src2))]>;
-}
+let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
+ def XOR8rr : I<0x30, MRMDestReg,
+ (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
+ "xor{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (xor GR8:$src1, GR8:$src2))]>;
+ def XOR16rr : I<0x31, MRMDestReg,
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "xor{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (xor GR16:$src1, GR16:$src2))]>, OpSize;
+ def XOR32rr : I<0x31, MRMDestReg,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "xor{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (xor GR32:$src1, GR32:$src2))]>;
+} // isCommutable = 1
def XOR8rm : I<0x32, MRMSrcMem ,
(outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2),
def XOR16rm : I<0x33, MRMSrcMem ,
(outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
"xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2)))]>, OpSize;
+ [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2)))]>,
+ OpSize;
def XOR32rm : I<0x33, MRMSrcMem ,
(outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
"xor{l}\t{$src2, $dst|$dst, $src2}",
(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
"xor{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (xor GR32:$src1, i32immSExt8:$src2))]>;
+
let isTwoAddress = 0 in {
def XOR8mr : I<0x30, MRMDestMem,
(outs), (ins i8mem :$dst, GR8 :$src),
(outs), (ins i32mem:$dst, i32i8imm :$src),
"xor{l}\t{$src, $dst|$dst, $src}",
[(store (xor (load addr:$dst), i32immSExt8:$src), addr:$dst)]>;
-}
+} // isTwoAddress = 0
} // Defs = [EFLAGS]
// Shift instructions
def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src),
"shl{l}\t{%cl, $dst|$dst, %CL}",
[(set GR32:$dst, (shl GR32:$src, CL))]>;
-}
+} // Uses = [CL]
def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
"shl{b}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
// NOTE: We don't use shifts of a register by one, because 'add reg,reg' is
// cheaper.
-}
+} // isConvertibleToThreeAddress = 1
let isTwoAddress = 0 in {
let Uses = [CL] in {
// Arithmetic.
let Defs = [EFLAGS] in {
let isCommutable = 1 in { // X = ADD Y, Z --> X = ADD Z, Y
-def ADD8rr : I<0x00, MRMDestReg, (outs GR8 :$dst),
- (ins GR8 :$src1, GR8 :$src2),
- "add{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (add GR8:$src1, GR8:$src2))]>;
+// Register-Register Addition
+def ADD8rr : I<0x00, MRMDestReg, (outs GR8 :$dst),
+ (ins GR8 :$src1, GR8 :$src2),
+ "add{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (add GR8:$src1, GR8:$src2)),
+ (implicit EFLAGS)]>;
+
let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
+// Register-Register Addition
def ADD16rr : I<0x01, MRMDestReg, (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2),
"add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (add GR16:$src1, GR16:$src2))]>, OpSize;
+ [(set GR16:$dst, (add GR16:$src1, GR16:$src2)),
+ (implicit EFLAGS)]>, OpSize;
def ADD32rr : I<0x01, MRMDestReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (add GR32:$src1, GR32:$src2))]>;
+ [(set GR32:$dst, (add GR32:$src1, GR32:$src2)),
+ (implicit EFLAGS)]>;
} // end isConvertibleToThreeAddress
} // end isCommutable
+
+// Register-Memory Addition
def ADD8rm : I<0x02, MRMSrcMem, (outs GR8 :$dst),
(ins GR8 :$src1, i8mem :$src2),
"add{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (add GR8:$src1, (load addr:$src2)))]>;
+ [(set GR8:$dst, (add GR8:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
def ADD16rm : I<0x03, MRMSrcMem, (outs GR16:$dst),
(ins GR16:$src1, i16mem:$src2),
"add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (add GR16:$src1, (load addr:$src2)))]>,OpSize;
+ [(set GR16:$dst, (add GR16:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>, OpSize;
def ADD32rm : I<0x03, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (add GR32:$src1, (load addr:$src2)))]>;
+ [(set GR32:$dst, (add GR32:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
-def ADD8ri : Ii8<0x80, MRM0r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
- "add{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (add GR8:$src1, imm:$src2))]>;
+// Register-Integer Addition
+def ADD8ri : Ii8<0x80, MRM0r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "add{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (add GR8:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
+// Register-Integer Addition
def ADD16ri : Ii16<0x81, MRM0r, (outs GR16:$dst),
(ins GR16:$src1, i16imm:$src2),
"add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (add GR16:$src1, imm:$src2))]>, OpSize;
+ [(set GR16:$dst, (add GR16:$src1, imm:$src2)),
+ (implicit EFLAGS)]>, OpSize;
def ADD32ri : Ii32<0x81, MRM0r, (outs GR32:$dst),
(ins GR32:$src1, i32imm:$src2),
"add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (add GR32:$src1, imm:$src2))]>;
+ [(set GR32:$dst, (add GR32:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
def ADD16ri8 : Ii8<0x83, MRM0r, (outs GR16:$dst),
(ins GR16:$src1, i16i8imm:$src2),
"add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (add GR16:$src1, i16immSExt8:$src2))]>, OpSize;
+ [(set GR16:$dst, (add GR16:$src1, i16immSExt8:$src2)),
+ (implicit EFLAGS)]>, OpSize;
def ADD32ri8 : Ii8<0x83, MRM0r, (outs GR32:$dst),
(ins GR32:$src1, i32i8imm:$src2),
"add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (add GR32:$src1, i32immSExt8:$src2))]>;
+ [(set GR32:$dst, (add GR32:$src1, i32immSExt8:$src2)),
+ (implicit EFLAGS)]>;
}
let isTwoAddress = 0 in {
- def ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
+ // Memory-Register Addition
+ def ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
"add{b}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), GR8:$src2), addr:$dst)]>;
+ [(store (add (load addr:$dst), GR8:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
def ADD16mr : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
"add{w}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), GR16:$src2), addr:$dst)]>,
- OpSize;
+ [(store (add (load addr:$dst), GR16:$src2), addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
def ADD32mr : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
"add{l}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), GR32:$src2), addr:$dst)]>;
+ [(store (add (load addr:$dst), GR32:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
def ADD8mi : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
"add{b}\t{$src2, $dst|$dst, $src2}",
- [(store (add (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
+ [(store (add (loadi8 addr:$dst), imm:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
def ADD16mi : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
"add{w}\t{$src2, $dst|$dst, $src2}",
- [(store (add (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
- OpSize;
+ [(store (add (loadi16 addr:$dst), imm:$src2), addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
def ADD32mi : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
"add{l}\t{$src2, $dst|$dst, $src2}",
- [(store (add (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
+ [(store (add (loadi32 addr:$dst), imm:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
def ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
"add{w}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
- OpSize;
+ [(store (add (load addr:$dst), i16immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
def ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
"add{l}\t{$src2, $dst|$dst, $src2}",
- [(store (add (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
+ [(store (add (load addr:$dst), i32immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)]>;
}
let Uses = [EFLAGS] in {
[(store (adde (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
def ADC32mi8 : Ii8<0x83, MRM2m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
"adc{l}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
+ [(store (adde (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
}
} // Uses = [EFLAGS]
-def SUB8rr : I<0x28, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
- "sub{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sub GR8:$src1, GR8:$src2))]>;
-def SUB16rr : I<0x29, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sub GR16:$src1, GR16:$src2))]>, OpSize;
-def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sub GR32:$src1, GR32:$src2))]>;
-def SUB8rm : I<0x2A, MRMSrcMem, (outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
- "sub{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2)))]>;
-def SUB16rm : I<0x2B, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
- "sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2)))]>, OpSize;
-def SUB32rm : I<0x2B, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
- "sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sub GR32:$src1, (load addr:$src2)))]>;
-
-def SUB8ri : Ii8 <0x80, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+// Register-Register Subtraction
+def SUB8rr : I<0x28, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "sub{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sub GR8:$src1, GR8:$src2)),
+ (implicit EFLAGS)]>;
+def SUB16rr : I<0x29, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
+ "sub{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sub GR16:$src1, GR16:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
+ "sub{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sub GR32:$src1, GR32:$src2)),
+ (implicit EFLAGS)]>;
+
+// Register-Memory Subtraction
+def SUB8rm : I<0x2A, MRMSrcMem, (outs GR8 :$dst),
+ (ins GR8 :$src1, i8mem :$src2),
+ "sub{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+def SUB16rm : I<0x2B, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$src1, i16mem:$src2),
+ "sub{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>, OpSize;
+def SUB32rm : I<0x2B, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$src1, i32mem:$src2),
+ "sub{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, (sub GR32:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>;
+
+// Register-Integer Subtraction
+def SUB8ri : Ii8 <0x80, MRM5r, (outs GR8:$dst),
+ (ins GR8:$src1, i8imm:$src2),
"sub{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sub GR8:$src1, imm:$src2))]>;
-def SUB16ri : Ii16<0x81, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ [(set GR8:$dst, (sub GR8:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+def SUB16ri : Ii16<0x81, MRM5r, (outs GR16:$dst),
+ (ins GR16:$src1, i16imm:$src2),
"sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sub GR16:$src1, imm:$src2))]>, OpSize;
-def SUB32ri : Ii32<0x81, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+ [(set GR16:$dst, (sub GR16:$src1, imm:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def SUB32ri : Ii32<0x81, MRM5r, (outs GR32:$dst),
+ (ins GR32:$src1, i32imm:$src2),
"sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sub GR32:$src1, imm:$src2))]>;
-def SUB16ri8 : Ii8<0x83, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+ [(set GR32:$dst, (sub GR32:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
+def SUB16ri8 : Ii8<0x83, MRM5r, (outs GR16:$dst),
+ (ins GR16:$src1, i16i8imm:$src2),
"sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sub GR16:$src1, i16immSExt8:$src2))]>,
- OpSize;
-def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+ [(set GR16:$dst, (sub GR16:$src1, i16immSExt8:$src2)),
+ (implicit EFLAGS)]>, OpSize;
+def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst),
+ (ins GR32:$src1, i32i8imm:$src2),
"sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sub GR32:$src1, i32immSExt8:$src2))]>;
+ [(set GR32:$dst, (sub GR32:$src1, i32immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+
let isTwoAddress = 0 in {
+ // Memory-Register Subtraction
def SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
"sub{b}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), GR8:$src2), addr:$dst)]>;
+ [(store (sub (load addr:$dst), GR8:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
def SUB16mr : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
"sub{w}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), GR16:$src2), addr:$dst)]>,
- OpSize;
+ [(store (sub (load addr:$dst), GR16:$src2), addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
def SUB32mr : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
"sub{l}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), GR32:$src2), addr:$dst)]>;
+ [(store (sub (load addr:$dst), GR32:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
+
+ // Memory-Integer Subtraction
def SUB8mi : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2),
"sub{b}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
+ [(store (sub (loadi8 addr:$dst), imm:$src2), addr:$dst),
+ (implicit EFLAGS)]>;
def SUB16mi : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2),
"sub{w}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (loadi16 addr:$dst), imm:$src2), addr:$dst)]>,
- OpSize;
+ [(store (sub (loadi16 addr:$dst), imm:$src2),addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
def SUB32mi : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2),
"sub{l}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (loadi32 addr:$dst), imm:$src2), addr:$dst)]>;
+ [(store (sub (loadi32 addr:$dst), imm:$src2),addr:$dst),
+ (implicit EFLAGS)]>;
def SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
"sub{w}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), i16immSExt8:$src2), addr:$dst)]>,
- OpSize;
- def SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+ [(store (sub (load addr:$dst), i16immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)]>, OpSize;
+ def SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
"sub{l}\t{$src2, $dst|$dst, $src2}",
- [(store (sub (load addr:$dst), i32immSExt8:$src2), addr:$dst)]>;
+ [(store (sub (load addr:$dst), i32immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)]>;
}
let Uses = [EFLAGS] in {
let Defs = [EFLAGS] in {
let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y
-def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+// Register-Register Signed Integer Multiply
+def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (mul GR16:$src1, GR16:$src2))]>, TB, OpSize;
-def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ [(set GR16:$dst, (mul GR16:$src1, GR16:$src2)),
+ (implicit EFLAGS)]>, TB, OpSize;
+def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
"imul{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (mul GR32:$src1, GR32:$src2))]>, TB;
+ [(set GR32:$dst, (mul GR32:$src1, GR32:$src2)),
+ (implicit EFLAGS)]>, TB;
}
-def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+
+// Register-Memory Signed Integer Multiply
+def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$src1, i16mem:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (mul GR16:$src1, (load addr:$src2)))]>,
- TB, OpSize;
+ [(set GR16:$dst, (mul GR16:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>, TB, OpSize;
def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
"imul{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (mul GR32:$src1, (load addr:$src2)))]>, TB;
+ [(set GR32:$dst, (mul GR32:$src1, (load addr:$src2))),
+ (implicit EFLAGS)]>, TB;
} // Defs = [EFLAGS]
} // end Two Address instructions
// Suprisingly enough, these are not two address instructions!
let Defs = [EFLAGS] in {
+// Register-Integer Signed Integer Multiply
def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, (mul GR16:$src1, imm:$src2))]>, OpSize;
+ [(set GR16:$dst, (mul GR16:$src1, imm:$src2)),
+ (implicit EFLAGS)]>, OpSize;
def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32
(outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (mul GR32:$src1, imm:$src2))]>;
+ [(set GR32:$dst, (mul GR32:$src1, imm:$src2)),
+ (implicit EFLAGS)]>;
def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, (mul GR16:$src1, i16immSExt8:$src2))]>,
- OpSize;
+ [(set GR16:$dst, (mul GR16:$src1, i16immSExt8:$src2)),
+ (implicit EFLAGS)]>, OpSize;
def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8
(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (mul GR32:$src1, i32immSExt8:$src2))]>;
+ [(set GR32:$dst, (mul GR32:$src1, i32immSExt8:$src2)),
+ (implicit EFLAGS)]>;
+// Memory-Integer Signed Integer Multiply
def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
(outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, (mul (load addr:$src1), imm:$src2))]>,
- OpSize;
+ [(set GR16:$dst, (mul (load addr:$src1), imm:$src2)),
+ (implicit EFLAGS)]>, OpSize;
def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
(outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (mul (load addr:$src1), imm:$src2))]>;
+ [(set GR32:$dst, (mul (load addr:$src1), imm:$src2)),
+ (implicit EFLAGS)]>;
def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
(outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, (mul (load addr:$src1), i16immSExt8:$src2))]>,
- OpSize;
+ [(set GR16:$dst, (mul (load addr:$src1),
+ i16immSExt8:$src2)),
+ (implicit EFLAGS)]>, OpSize;
def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
(outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (mul (load addr:$src1), i32immSExt8:$src2))]>;
+ [(set GR32:$dst, (mul (load addr:$src1),
+ i32immSExt8:$src2)),
+ (implicit EFLAGS)]>;
} // Defs = [EFLAGS]
//===----------------------------------------------------------------------===//
"sete\t$dst",
[(store (X86setcc X86_COND_E, EFLAGS), addr:$dst)]>,
TB; // [mem8] = ==
+
def SETNEr : I<0x95, MRM0r,
(outs GR8 :$dst), (ins),
"setne\t$dst",
"setne\t$dst",
[(store (X86setcc X86_COND_NE, EFLAGS), addr:$dst)]>,
TB; // [mem8] = !=
+
def SETLr : I<0x9C, MRM0r,
(outs GR8 :$dst), (ins),
"setl\t$dst",
"setl\t$dst",
[(store (X86setcc X86_COND_L, EFLAGS), addr:$dst)]>,
TB; // [mem8] = < signed
+
def SETGEr : I<0x9D, MRM0r,
(outs GR8 :$dst), (ins),
"setge\t$dst",
"setge\t$dst",
[(store (X86setcc X86_COND_GE, EFLAGS), addr:$dst)]>,
TB; // [mem8] = >= signed
+
def SETLEr : I<0x9E, MRM0r,
(outs GR8 :$dst), (ins),
"setle\t$dst",
"setle\t$dst",
[(store (X86setcc X86_COND_LE, EFLAGS), addr:$dst)]>,
TB; // [mem8] = <= signed
+
def SETGr : I<0x9F, MRM0r,
(outs GR8 :$dst), (ins),
"setg\t$dst",
"setb\t$dst",
[(store (X86setcc X86_COND_B, EFLAGS), addr:$dst)]>,
TB; // [mem8] = < unsign
+
def SETAEr : I<0x93, MRM0r,
(outs GR8 :$dst), (ins),
"setae\t$dst",
"setae\t$dst",
[(store (X86setcc X86_COND_AE, EFLAGS), addr:$dst)]>,
TB; // [mem8] = >= unsign
+
def SETBEr : I<0x96, MRM0r,
(outs GR8 :$dst), (ins),
"setbe\t$dst",
"setbe\t$dst",
[(store (X86setcc X86_COND_BE, EFLAGS), addr:$dst)]>,
TB; // [mem8] = <= unsign
+
def SETAr : I<0x97, MRM0r,
(outs GR8 :$dst), (ins),
"seta\t$dst",
"setns\t$dst",
[(store (X86setcc X86_COND_NS, EFLAGS), addr:$dst)]>,
TB; // [mem8] = !<sign bit>
+
def SETPr : I<0x9A, MRM0r,
(outs GR8 :$dst), (ins),
"setp\t$dst",
"setnp\t$dst",
[(store (X86setcc X86_COND_NP, EFLAGS), addr:$dst)]>,
TB; // [mem8] = not parity
+
+def SETOr : I<0x90, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "seto\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_O, EFLAGS))]>,
+ TB; // GR8 = overflow
+def SETOm : I<0x90, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "seto\t$dst",
+ [(store (X86setcc X86_COND_O, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = overflow
+def SETNOr : I<0x91, MRM0r,
+ (outs GR8 :$dst), (ins),
+ "setno\t$dst",
+ [(set GR8:$dst, (X86setcc X86_COND_NO, EFLAGS))]>,
+ TB; // GR8 = not overflow
+def SETNOm : I<0x91, MRM0m,
+ (outs), (ins i8mem:$dst),
+ "setno\t$dst",
+ [(store (X86setcc X86_COND_NO, EFLAGS), addr:$dst)]>,
+ TB; // [mem8] = not overflow
} // Uses = [EFLAGS]
(implicit EFLAGS)]>;
} // Defs = [EFLAGS]
+// Bit tests.
+// TODO: BTC, BTR, and BTS
+let Defs = [EFLAGS] in {
+def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt GR16:$src1, GR16:$src2),
+ (implicit EFLAGS)]>, OpSize, TB;
+def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt GR32:$src1, GR32:$src2),
+ (implicit EFLAGS)]>, TB;
+
+// Unlike with the register+register form, the memory+register form of the
+// bt instruction does not ignore the high bits of the index. From ISel's
+// perspective, this is pretty bizarre. Disable these instructions for now.
+//def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+// "bt{w}\t{$src2, $src1|$src1, $src2}",
+// [(X86bt (loadi16 addr:$src1), GR16:$src2),
+// (implicit EFLAGS)]>, OpSize, TB, Requires<[FastBTMem]>;
+//def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+// "bt{l}\t{$src2, $src1|$src1, $src2}",
+// [(X86bt (loadi32 addr:$src1), GR32:$src2),
+// (implicit EFLAGS)]>, TB, Requires<[FastBTMem]>;
+
+def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt GR16:$src1, i16immSExt8:$src2),
+ (implicit EFLAGS)]>, OpSize, TB;
+def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt GR32:$src1, i32immSExt8:$src2),
+ (implicit EFLAGS)]>, TB;
+// Note that these instructions don't need FastBTMem because that
+// only applies when the other operand is in a register. When it's
+// an immediate, bt is still fast.
+def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt (loadi16 addr:$src1), i16immSExt8:$src2),
+ (implicit EFLAGS)]>, OpSize, TB;
+def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ [(X86bt (loadi32 addr:$src1), i32immSExt8:$src2),
+ (implicit EFLAGS)]>, TB;
+} // Defs = [EFLAGS]
+
// Sign/Zero extenders
+// Use movsbl intead of movsbw; we don't care about the high 16 bits
+// of the register here. This has a smaller encoding and avoids a
+// partial-register update.
def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
- "movs{bw|x}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (sext GR8:$src))]>, TB, OpSize;
+ "movs{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR16:$dst, (sext GR8:$src))]>, TB;
def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
- "movs{bw|x}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB, OpSize;
+ "movs{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB;
def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
"movs{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sext GR8:$src))]>, TB;
"movs{wl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
+// Use movzbl intead of movzbw; we don't care about the high 16 bits
+// of the register here. This has a smaller encoding and avoids a
+// partial-register update.
def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
- "movz{bw|x}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (zext GR8:$src))]>, TB, OpSize;
+ "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR16:$dst, (zext GR8:$src))]>, TB;
def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
- "movz{bw|x}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB, OpSize;
+ "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
+ [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB;
def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
"movz{bl|x}\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (zext GR8:$src))]>, TB;
// Alias instructions that map movr0 to xor.
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
-let Defs = [EFLAGS], isReMaterializable = 1 in {
+let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins),
"xor{b}\t$dst, $dst",
[(set GR8:$dst, 0)]>;
+// Use xorl instead of xorw since we don't care about the high 16 bits,
+// it's smaller, and it avoids a partial-register update.
def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
- "xor{w}\t$dst, $dst",
- [(set GR16:$dst, 0)]>, OpSize;
+ "xor{l}\t${dst:subreg32}, ${dst:subreg32}",
+ [(set GR16:$dst, 0)]>;
def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins),
"xor{l}\t$dst, $dst",
[(set GR32:$dst, 0)]>;
"mov{l}\t{$src, $dst|$dst, $src}", []>;
} // neverHasSideEffects
-let isSimpleLoad = 1, mayLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
def MOV16_rm : I<0x8B, MRMSrcMem, (outs GR16_:$dst), (ins i16mem:$src),
"mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
def MOV32_rm : I<0x8B, MRMSrcMem, (outs GR32_:$dst), (ins i32mem:$src),
[(set GR32:$dst, (load (add X86TLStp, GR32:$src)))]>;
let AddedComplexity = 15 in
-def TLS_gs_ri : I<0, Pseudo, (outs GR32:$dst), (ins i32imm:$src),
+def TLS_gs_ri : I<0x8B, Pseudo, (outs GR32:$dst), (ins i32imm:$src),
"movl\t%gs:${src:mem}, $dst",
[(set GR32:$dst,
- (load (add X86TLStp, (X86Wrapper tglobaltlsaddr:$src))))]>;
+ (load (add X86TLStp, (X86Wrapper tglobaltlsaddr:$src))))]>,
+ SegGS;
-def TLS_tp : I<0, Pseudo, (outs GR32:$dst), (ins),
+def TLS_tp : I<0x8B, Pseudo, (outs GR32:$dst), (ins),
"movl\t%gs:0, $dst",
- [(set GR32:$dst, X86TLStp)]>;
+ [(set GR32:$dst, X86TLStp)]>, SegGS;
+
+let AddedComplexity = 5 in
+def GS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "movl\t%gs:$src, $dst",
+ [(set GR32:$dst, (gsload addr:$src))]>, SegGS;
//===----------------------------------------------------------------------===//
// DWARF Pseudo Instructions
// Atomic swap. These are just normal xchg instructions. But since a memory
// operand is referenced, the atomicity is ensured.
-let Constraints = "$val = $dst", Defs = [EFLAGS] in {
+let Constraints = "$val = $dst" in {
def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
"xchg{l}\t{$val, $ptr|$ptr, $val}",
[(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>;
// Atomic compare and swap.
let Defs = [EAX, EFLAGS], Uses = [EAX] in {
def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
- "lock cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
+ "lock\n\tcmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
[(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
}
-let Defs = [EAX, EBX, ECX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in {
-def LCMPXCHG8B : I<0xC7, MRMDestMem, (outs), (ins i32mem:$ptr),
- "lock cmpxchg8b\t$ptr",
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in {
+def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i32mem:$ptr),
+ "lock\n\tcmpxchg8b\t$ptr",
[(X86cas8 addr:$ptr)]>, TB, LOCK;
}
let Defs = [AX, EFLAGS], Uses = [AX] in {
def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
- "lock cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
+ "lock\n\tcmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
[(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK;
}
let Defs = [AL, EFLAGS], Uses = [AL] in {
def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
- "lock cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
+ "lock\n\tcmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
[(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
}
// Atomic exchange and add
let Constraints = "$val = $dst", Defs = [EFLAGS] in {
def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
- "lock xadd{l}\t{$val, $ptr|$ptr, $val}",
- [(set GR32:$dst, (atomic_las_32 addr:$ptr, GR32:$val))]>,
+ "lock\n\txadd{l}\t{$val, $ptr|$ptr, $val}",
+ [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
TB, LOCK;
def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
- "lock xadd{w}\t{$val, $ptr|$ptr, $val}",
- [(set GR16:$dst, (atomic_las_16 addr:$ptr, GR16:$val))]>,
+ "lock\n\txadd{w}\t{$val, $ptr|$ptr, $val}",
+ [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
TB, OpSize, LOCK;
def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins i8mem:$ptr, GR8:$val),
- "lock xadd{b}\t{$val, $ptr|$ptr, $val}",
- [(set GR8:$dst, (atomic_las_8 addr:$ptr, GR8:$val))]>,
+ "lock\n\txadd{b}\t{$val, $ptr|$ptr, $val}",
+ [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
TB, LOCK;
}
-// Atomic exchange and and, or, xor
+// Atomic exchange, and, or, xor
let Constraints = "$val = $dst", Defs = [EFLAGS],
usesCustomDAGSchedInserter = 1 in {
def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMAND32 PSUEDO!",
- [(set GR32:$dst, (atomic_load_and addr:$ptr, GR32:$val))]>;
-}
-
-let Constraints = "$val = $dst", Defs = [EFLAGS],
- usesCustomDAGSchedInserter = 1 in {
+ "#ATOMAND32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>;
def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMOR32 PSUEDO!",
- [(set GR32:$dst, (atomic_load_or addr:$ptr, GR32:$val))]>;
-}
-
-let Constraints = "$val = $dst", Defs = [EFLAGS],
- usesCustomDAGSchedInserter = 1 in {
+ "#ATOMOR32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>;
def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMXOR32 PSUEDO!",
- [(set GR32:$dst, (atomic_load_xor addr:$ptr, GR32:$val))]>;
-}
-
-let Constraints = "$val = $dst", Defs = [EFLAGS],
- usesCustomDAGSchedInserter = 1 in {
+ "#ATOMXOR32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>;
+def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+ "#ATOMNAND32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>;
def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
- "#ATOMMIN32 PSUEDO!",
- [(set GR32:$dst, (atomic_load_min addr:$ptr, GR32:$val))]>;
-}
-
-let Constraints = "$val = $dst", Defs = [EFLAGS],
- usesCustomDAGSchedInserter = 1 in {
+ "#ATOMMIN32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>;
def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMMAX32 PSUEDO!",
- [(set GR32:$dst, (atomic_load_max addr:$ptr, GR32:$val))]>;
-}
-
-let Constraints = "$val = $dst", Defs = [EFLAGS],
- usesCustomDAGSchedInserter = 1 in {
+ "#ATOMMAX32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>;
def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMUMIN32 PSUEDO!",
- [(set GR32:$dst, (atomic_load_umin addr:$ptr, GR32:$val))]>;
-}
-
-let Constraints = "$val = $dst", Defs = [EFLAGS],
- usesCustomDAGSchedInserter = 1 in {
+ "#ATOMUMIN32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>;
def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
- "#ATOMUMAX32 PSUEDO!",
- [(set GR32:$dst, (atomic_load_umax addr:$ptr, GR32:$val))]>;
+ "#ATOMUMAX32 PSEUDO!",
+ [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>;
+
+def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMAND16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>;
+def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMOR16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>;
+def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMXOR16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>;
+def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMNAND16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>;
+def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
+ "#ATOMMIN16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>;
+def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMMAX16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>;
+def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMUMIN16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>;
+def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+ "#ATOMUMAX16 PSEUDO!",
+ [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>;
+
+def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMAND8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>;
+def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMOR8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>;
+def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMXOR8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>;
+def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+ "#ATOMNAND8 PSEUDO!",
+ [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>;
+}
+
+let Constraints = "$val1 = $dst1, $val2 = $dst2",
+ Defs = [EFLAGS, EAX, EBX, ECX, EDX],
+ Uses = [EAX, EBX, ECX, EDX],
+ mayLoad = 1, mayStore = 1,
+ usesCustomDAGSchedInserter = 1 in {
+def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMAND6432 PSEUDO!", []>;
+def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMOR6432 PSEUDO!", []>;
+def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMXOR6432 PSEUDO!", []>;
+def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMNAND6432 PSEUDO!", []>;
+def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMADD6432 PSEUDO!", []>;
+def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMSUB6432 PSEUDO!", []>;
+def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+ (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+ "#ATOMSWAP6432 PSEUDO!", []>;
}
//===----------------------------------------------------------------------===//
def : Pat<(parallel (X86cmp GR32:$src1, 0), (implicit EFLAGS)),
(TEST32rr GR32:$src1, GR32:$src1)>;
+// Conditional moves with folded loads with operands swapped and conditions
+// inverted.
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_B, EFLAGS),
+ (CMOVAE16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_B, EFLAGS),
+ (CMOVAE32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_AE, EFLAGS),
+ (CMOVB16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_AE, EFLAGS),
+ (CMOVB32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_E, EFLAGS),
+ (CMOVNE16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_E, EFLAGS),
+ (CMOVNE32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NE, EFLAGS),
+ (CMOVE16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NE, EFLAGS),
+ (CMOVE32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_BE, EFLAGS),
+ (CMOVA16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_BE, EFLAGS),
+ (CMOVA32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_A, EFLAGS),
+ (CMOVBE16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_A, EFLAGS),
+ (CMOVBE32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_L, EFLAGS),
+ (CMOVGE16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_L, EFLAGS),
+ (CMOVGE32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_GE, EFLAGS),
+ (CMOVL16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_GE, EFLAGS),
+ (CMOVL32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_LE, EFLAGS),
+ (CMOVG16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_LE, EFLAGS),
+ (CMOVG32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_G, EFLAGS),
+ (CMOVLE16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_G, EFLAGS),
+ (CMOVLE32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_P, EFLAGS),
+ (CMOVNP16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_P, EFLAGS),
+ (CMOVNP32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NP, EFLAGS),
+ (CMOVP16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NP, EFLAGS),
+ (CMOVP32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_S, EFLAGS),
+ (CMOVNS16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_S, EFLAGS),
+ (CMOVNS32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NS, EFLAGS),
+ (CMOVS16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NS, EFLAGS),
+ (CMOVS32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_O, EFLAGS),
+ (CMOVNO16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_O, EFLAGS),
+ (CMOVNO32rm GR32:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NO, EFLAGS),
+ (CMOVO16rm GR16:$src2, addr:$src1)>;
+def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NO, EFLAGS),
+ (CMOVO32rm GR32:$src2, addr:$src1)>;
+
// zextload bool -> zextload byte
def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
// extload bool -> extload byte
def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>;
-def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>,
+ Requires<[In32BitMode]>;
def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
-def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>,
+ Requires<[In32BitMode]>;
def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>;
def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
-// anyext -> zext
-def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>;
-def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
-def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>;
-def : Pat<(i16 (anyext (loadi8 addr:$src))), (MOVZX16rm8 addr:$src)>;
-def : Pat<(i32 (anyext (loadi8 addr:$src))), (MOVZX32rm8 addr:$src)>;
-def : Pat<(i32 (anyext (loadi16 addr:$src))), (MOVZX32rm16 addr:$src)>;
+// anyext
+def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>,
+ Requires<[In32BitMode]>;
+def : Pat<(i32 (anyext GR16:$src)),
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>;
// (and (i32 load), 255) -> (zextload i8)
-def : Pat<(i32 (and (loadi32 addr:$src), (i32 255))), (MOVZX32rm8 addr:$src)>;
-def : Pat<(i32 (and (loadi32 addr:$src), (i32 65535))),(MOVZX32rm16 addr:$src)>;
+def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 255))),
+ (MOVZX32rm8 addr:$src)>;
+def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 65535))),
+ (MOVZX32rm16 addr:$src)>;
//===----------------------------------------------------------------------===//
// Some peepholes
//===----------------------------------------------------------------------===//
+// Odd encoding trick: -128 fits into an 8-bit immediate field while
+// +128 doesn't, so in this special case use a sub instead of an add.
+def : Pat<(add GR16:$src1, 128),
+ (SUB16ri8 GR16:$src1, -128)>;
+def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
+ (SUB16mi8 addr:$dst, -128)>;
+def : Pat<(add GR32:$src1, 128),
+ (SUB32ri8 GR32:$src1, -128)>;
+def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
+ (SUB32mi8 addr:$dst, -128)>;
+
+// r & (2^16-1) ==> movz
+def : Pat<(and GR32:$src1, 0xffff),
+ (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+ (MOVZX32rr8 (i8 (EXTRACT_SUBREG (MOV32to32_ GR32:$src1),
+ x86_subreg_8bit)))>,
+ Requires<[In32BitMode]>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+ (MOVZX16rr8 (i8 (EXTRACT_SUBREG (MOV16to16_ GR16:$src1),
+ x86_subreg_8bit)))>,
+ Requires<[In32BitMode]>;
+
+// sext_inreg patterns
+def : Pat<(sext_inreg GR32:$src, i16),
+ (MOVSX32rr16 (i16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)))>;
+def : Pat<(sext_inreg GR32:$src, i8),
+ (MOVSX32rr8 (i8 (EXTRACT_SUBREG (MOV32to32_ GR32:$src),
+ x86_subreg_8bit)))>,
+ Requires<[In32BitMode]>;
+def : Pat<(sext_inreg GR16:$src, i8),
+ (MOVSX16rr8 (i8 (EXTRACT_SUBREG (MOV16to16_ GR16:$src),
+ x86_subreg_8bit)))>,
+ Requires<[In32BitMode]>;
+
+// trunc patterns
+def : Pat<(i16 (trunc GR32:$src)),
+ (i16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>;
+def : Pat<(i8 (trunc GR32:$src)),
+ (i8 (EXTRACT_SUBREG (MOV32to32_ GR32:$src), x86_subreg_8bit))>,
+ Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc GR16:$src)),
+ (i8 (EXTRACT_SUBREG (MOV16to16_ GR16:$src), x86_subreg_8bit))>,
+ Requires<[In32BitMode]>;
+
// (shl x, 1) ==> (add x, x)
def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
+// (shl x (and y, 31)) ==> (shl x, y)
+def : Pat<(shl GR8:$src1, (and CL:$amt, 31)),
+ (SHL8rCL GR8:$src1)>;
+def : Pat<(shl GR16:$src1, (and CL:$amt, 31)),
+ (SHL16rCL GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (and CL:$amt, 31)),
+ (SHL32rCL GR32:$src1)>;
+def : Pat<(store (shl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SHL8mCL addr:$dst)>;
+def : Pat<(store (shl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SHL16mCL addr:$dst)>;
+def : Pat<(store (shl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SHL32mCL addr:$dst)>;
+
+def : Pat<(srl GR8:$src1, (and CL:$amt, 31)),
+ (SHR8rCL GR8:$src1)>;
+def : Pat<(srl GR16:$src1, (and CL:$amt, 31)),
+ (SHR16rCL GR16:$src1)>;
+def : Pat<(srl GR32:$src1, (and CL:$amt, 31)),
+ (SHR32rCL GR32:$src1)>;
+def : Pat<(store (srl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SHR8mCL addr:$dst)>;
+def : Pat<(store (srl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SHR16mCL addr:$dst)>;
+def : Pat<(store (srl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SHR32mCL addr:$dst)>;
+
+def : Pat<(sra GR8:$src1, (and CL:$amt, 31)),
+ (SAR8rCL GR8:$src1)>;
+def : Pat<(sra GR16:$src1, (and CL:$amt, 31)),
+ (SAR16rCL GR16:$src1)>;
+def : Pat<(sra GR32:$src1, (and CL:$amt, 31)),
+ (SAR32rCL GR32:$src1)>;
+def : Pat<(store (sra (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SAR8mCL addr:$dst)>;
+def : Pat<(store (sra (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SAR16mCL addr:$dst)>;
+def : Pat<(store (sra (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+ (SAR32mCL addr:$dst)>;
+
// (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c)
def : Pat<(or (srl GR32:$src1, CL:$amt),
(shl GR32:$src2, (sub 32, CL:$amt))),
(shl GR32:$src2, (sub 32, CL:$amt))), addr:$dst),
(SHRD32mrCL addr:$dst, GR32:$src2)>;
+def : Pat<(or (srl GR32:$src1, (i8 (trunc ECX:$amt))),
+ (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
+ (SHRD32rrCL GR32:$src1, GR32:$src2)>;
+
+def : Pat<(store (or (srl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
+ (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
+ addr:$dst),
+ (SHRD32mrCL addr:$dst, GR32:$src2)>;
+
+def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)),
+ (SHRD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shrd (loadi32 addr:$dst), (i8 imm:$amt1),
+ GR32:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHRD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
+
// (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c)
def : Pat<(or (shl GR32:$src1, CL:$amt),
(srl GR32:$src2, (sub 32, CL:$amt))),
(srl GR32:$src2, (sub 32, CL:$amt))), addr:$dst),
(SHLD32mrCL addr:$dst, GR32:$src2)>;
+def : Pat<(or (shl GR32:$src1, (i8 (trunc ECX:$amt))),
+ (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
+ (SHLD32rrCL GR32:$src1, GR32:$src2)>;
+
+def : Pat<(store (or (shl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
+ (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))),
+ addr:$dst),
+ (SHLD32mrCL addr:$dst, GR32:$src2)>;
+
+def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)),
+ (SHLD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shld (loadi32 addr:$dst), (i8 imm:$amt1),
+ GR32:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHLD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
+
// (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c)
def : Pat<(or (srl GR16:$src1, CL:$amt),
(shl GR16:$src2, (sub 16, CL:$amt))),
(shl GR16:$src2, (sub 16, CL:$amt))), addr:$dst),
(SHRD16mrCL addr:$dst, GR16:$src2)>;
+def : Pat<(or (srl GR16:$src1, (i8 (trunc CX:$amt))),
+ (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
+ (SHRD16rrCL GR16:$src1, GR16:$src2)>;
+
+def : Pat<(store (or (srl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
+ (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
+ addr:$dst),
+ (SHRD16mrCL addr:$dst, GR16:$src2)>;
+
+def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)),
+ (SHRD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shrd (loadi16 addr:$dst), (i8 imm:$amt1),
+ GR16:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHRD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
+
// (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c)
def : Pat<(or (shl GR16:$src1, CL:$amt),
(srl GR16:$src2, (sub 16, CL:$amt))),
(srl GR16:$src2, (sub 16, CL:$amt))), addr:$dst),
(SHLD16mrCL addr:$dst, GR16:$src2)>;
+def : Pat<(or (shl GR16:$src1, (i8 (trunc CX:$amt))),
+ (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
+ (SHLD16rrCL GR16:$src1, GR16:$src2)>;
+
+def : Pat<(store (or (shl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
+ (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))),
+ addr:$dst),
+ (SHLD16mrCL addr:$dst, GR16:$src2)>;
+
+def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)),
+ (SHLD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
+
+def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1),
+ GR16:$src2, (i8 imm:$amt2)), addr:$dst),
+ (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
+
+//===----------------------------------------------------------------------===//
+// Overflow Patterns
+//===----------------------------------------------------------------------===//
+
+// Register-Register Addition with Overflow
+def : Pat<(parallel (X86add_ovf GR8:$src1, GR8:$src2),
+ (implicit EFLAGS)),
+ (ADD8rr GR8:$src1, GR8:$src2)>;
+
+// Register-Register Addition with Overflow
+def : Pat<(parallel (X86add_ovf GR16:$src1, GR16:$src2),
+ (implicit EFLAGS)),
+ (ADD16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(parallel (X86add_ovf GR32:$src1, GR32:$src2),
+ (implicit EFLAGS)),
+ (ADD32rr GR32:$src1, GR32:$src2)>;
+
+// Register-Memory Addition with Overflow
+def : Pat<(parallel (X86add_ovf GR8:$src1, (load addr:$src2)),
+ (implicit EFLAGS)),
+ (ADD8rm GR8:$src1, addr:$src2)>;
+def : Pat<(parallel (X86add_ovf GR16:$src1, (load addr:$src2)),
+ (implicit EFLAGS)),
+ (ADD16rm GR16:$src1, addr:$src2)>;
+def : Pat<(parallel (X86add_ovf GR32:$src1, (load addr:$src2)),
+ (implicit EFLAGS)),
+ (ADD32rm GR32:$src1, addr:$src2)>;
+
+// Register-Integer Addition with Overflow
+def : Pat<(parallel (X86add_ovf GR8:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (ADD8ri GR8:$src1, imm:$src2)>;
+
+// Register-Integer Addition with Overflow
+def : Pat<(parallel (X86add_ovf GR16:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (ADD16ri GR16:$src1, imm:$src2)>;
+def : Pat<(parallel (X86add_ovf GR32:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (ADD32ri GR32:$src1, imm:$src2)>;
+def : Pat<(parallel (X86add_ovf GR16:$src1, i16immSExt8:$src2),
+ (implicit EFLAGS)),
+ (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(parallel (X86add_ovf GR32:$src1, i32immSExt8:$src2),
+ (implicit EFLAGS)),
+ (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// Memory-Register Addition with Overflow
+def : Pat<(parallel (store (X86add_ovf (load addr:$dst), GR8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD8mr addr:$dst, GR8:$src2)>;
+def : Pat<(parallel (store (X86add_ovf (load addr:$dst), GR16:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD16mr addr:$dst, GR16:$src2)>;
+def : Pat<(parallel (store (X86add_ovf (load addr:$dst), GR32:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD32mr addr:$dst, GR32:$src2)>;
+def : Pat<(parallel (store (X86add_ovf (loadi8 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD8mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86add_ovf (loadi16 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD16mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86add_ovf (loadi32 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD32mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86add_ovf (load addr:$dst), i16immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD16mi8 addr:$dst, i16immSExt8:$src2)>;
+def : Pat<(parallel (store (X86add_ovf (load addr:$dst), i32immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (ADD32mi8 addr:$dst, i32immSExt8:$src2)>;
+
+// Register-Register Subtraction with Overflow
+def : Pat<(parallel (X86sub_ovf GR8:$src1, GR8:$src2),
+ (implicit EFLAGS)),
+ (SUB8rr GR8:$src1, GR8:$src2)>;
+def : Pat<(parallel (X86sub_ovf GR16:$src1, GR16:$src2),
+ (implicit EFLAGS)),
+ (SUB16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(parallel (X86sub_ovf GR32:$src1, GR32:$src2),
+ (implicit EFLAGS)),
+ (SUB32rr GR32:$src1, GR32:$src2)>;
+
+// Register-Memory Subtraction with Overflow
+def : Pat<(parallel (X86sub_ovf GR8:$src1, (load addr:$src2)),
+ (implicit EFLAGS)),
+ (SUB8rm GR8:$src1, addr:$src2)>;
+def : Pat<(parallel (X86sub_ovf GR16:$src1, (load addr:$src2)),
+ (implicit EFLAGS)),
+ (SUB16rm GR16:$src1, addr:$src2)>;
+def : Pat<(parallel (X86sub_ovf GR32:$src1, (load addr:$src2)),
+ (implicit EFLAGS)),
+ (SUB32rm GR32:$src1, addr:$src2)>;
+
+// Register-Integer Subtraction with Overflow
+def : Pat<(parallel (X86sub_ovf GR8:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (SUB8ri GR8:$src1, imm:$src2)>;
+def : Pat<(parallel (X86sub_ovf GR16:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (SUB16ri GR16:$src1, imm:$src2)>;
+def : Pat<(parallel (X86sub_ovf GR32:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (SUB32ri GR32:$src1, imm:$src2)>;
+def : Pat<(parallel (X86sub_ovf GR16:$src1, i16immSExt8:$src2),
+ (implicit EFLAGS)),
+ (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(parallel (X86sub_ovf GR32:$src1, i32immSExt8:$src2),
+ (implicit EFLAGS)),
+ (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// Memory-Register Subtraction with Overflow
+def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), GR8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB8mr addr:$dst, GR8:$src2)>;
+def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), GR16:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB16mr addr:$dst, GR16:$src2)>;
+def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), GR32:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB32mr addr:$dst, GR32:$src2)>;
+
+// Memory-Integer Subtraction with Overflow
+def : Pat<(parallel (store (X86sub_ovf (loadi8 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB8mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86sub_ovf (loadi16 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB16mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86sub_ovf (loadi32 addr:$dst), imm:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB32mi addr:$dst, imm:$src2)>;
+def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), i16immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB16mi8 addr:$dst, i16immSExt8:$src2)>;
+def : Pat<(parallel (store (X86sub_ovf (load addr:$dst), i32immSExt8:$src2),
+ addr:$dst),
+ (implicit EFLAGS)),
+ (SUB32mi8 addr:$dst, i32immSExt8:$src2)>;
+
+
+// Register-Register Signed Integer Multiply with Overflow
+def : Pat<(parallel (X86smul_ovf GR16:$src1, GR16:$src2),
+ (implicit EFLAGS)),
+ (IMUL16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(parallel (X86smul_ovf GR32:$src1, GR32:$src2),
+ (implicit EFLAGS)),
+ (IMUL32rr GR32:$src1, GR32:$src2)>;
+
+// Register-Memory Signed Integer Multiply with Overflow
+def : Pat<(parallel (X86smul_ovf GR16:$src1, (load addr:$src2)),
+ (implicit EFLAGS)),
+ (IMUL16rm GR16:$src1, addr:$src2)>;
+def : Pat<(parallel (X86smul_ovf GR32:$src1, (load addr:$src2)),
+ (implicit EFLAGS)),
+ (IMUL32rm GR32:$src1, addr:$src2)>;
+
+// Register-Integer Signed Integer Multiply with Overflow
+def : Pat<(parallel (X86smul_ovf GR16:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (IMUL16rri GR16:$src1, imm:$src2)>;
+def : Pat<(parallel (X86smul_ovf GR32:$src1, imm:$src2),
+ (implicit EFLAGS)),
+ (IMUL32rri GR32:$src1, imm:$src2)>;
+def : Pat<(parallel (X86smul_ovf GR16:$src1, i16immSExt8:$src2),
+ (implicit EFLAGS)),
+ (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(parallel (X86smul_ovf GR32:$src1, i32immSExt8:$src2),
+ (implicit EFLAGS)),
+ (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// Memory-Integer Signed Integer Multiply with Overflow
+def : Pat<(parallel (X86smul_ovf (load addr:$src1), imm:$src2),
+ (implicit EFLAGS)),
+ (IMUL16rmi addr:$src1, imm:$src2)>;
+def : Pat<(parallel (X86smul_ovf (load addr:$src1), imm:$src2),
+ (implicit EFLAGS)),
+ (IMUL32rmi addr:$src1, imm:$src2)>;
+def : Pat<(parallel (X86smul_ovf (load addr:$src1), i16immSExt8:$src2),
+ (implicit EFLAGS)),
+ (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
+def : Pat<(parallel (X86smul_ovf (load addr:$src1), i32immSExt8:$src2),
+ (implicit EFLAGS)),
+ (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
+
+// Optimize multiple with overflow by 2.
+let AddedComplexity = 2 in {
+def : Pat<(parallel (X86smul_ovf GR16:$src1, 2),
+ (implicit EFLAGS)),
+ (ADD16rr GR16:$src1, GR16:$src1)>;
+
+def : Pat<(parallel (X86smul_ovf GR32:$src1, 2),
+ (implicit EFLAGS)),
+ (ADD32rr GR32:$src1, GR32:$src1)>;
+}
+
//===----------------------------------------------------------------------===//
// Floating Point Stack Support
//===----------------------------------------------------------------------===//