X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;ds=sidebyside;f=lib%2FTarget%2FX86%2FX86InstrCompiler.td;h=896994629824ab753d922a61af2dba22d621c813;hb=8971717313ad72b06940bcc4198ae00c2ecaa385;hp=87634dde0b90358fe6255cb246e61240b96f25dc;hpb=ed57984483b9268c30c71031fca07e71b985f169;p=oota-llvm.git diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 87634dde0b9..89699462982 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -129,12 +129,13 @@ def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size), // The MSVC runtime contains an _ftol2 routine for converting floating-point // to integer values. It has a strange calling convention: the input is -// popped from the x87 stack, and the return value is given in EDX:EAX. No -// other registers (aside from flags) are touched. +// popped from the x87 stack, and the return value is given in EDX:EAX. ECX is +// used as a temporary register. No other registers (aside from flags) are +// touched. // Microsoft toolchains do not support 80-bit precision, so a WIN_FTOL_80 // variant is unnecessary. -let Defs = [EAX, EDX, EFLAGS], FPForm = SpecialFP in { +let Defs = [EAX, EDX, ECX, EFLAGS], FPForm = SpecialFP in { def WIN_FTOL_32 : I<0, Pseudo, (outs), (ins RFP32:$src), "# win32 fptoui", [(X86WinFTOL RFP32:$src)]>, @@ -149,11 +150,12 @@ let Defs = [EAX, EDX, EFLAGS], FPForm = SpecialFP in { //===----------------------------------------------------------------------===// // EH Pseudo Instructions // +let SchedRW = [WriteSystem] in { let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), "ret\t#eh_return, addr: $addr", - [(X86ehret GR32:$addr)], IIC_RET>; + [(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>; } @@ -161,10 +163,38 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), "ret\t#eh_return, addr: $addr", - [(X86ehret GR64:$addr)], IIC_RET>; + [(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>; } +let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, + usesCustomInserter = 1 in { + def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf), + "#EH_SJLJ_SETJMP32", + [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>, + Requires<[In32BitMode]>; + def EH_SjLj_SetJmp64 : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf), + "#EH_SJLJ_SETJMP64", + [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>, + Requires<[In64BitMode]>; + let isTerminator = 1 in { + def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf), + "#EH_SJLJ_LONGJMP32", + [(X86eh_sjlj_longjmp addr:$buf)]>, + Requires<[In32BitMode]>; + def EH_SjLj_LongJmp64 : I<0, Pseudo, (outs), (ins i64mem:$buf), + "#EH_SJLJ_LONGJMP64", + [(X86eh_sjlj_longjmp addr:$buf)]>, + Requires<[In64BitMode]>; + } +} +} // SchedRW + +let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in { + def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst), + "#EH_SjLj_Setup\t$dst", []>; +} + //===----------------------------------------------------------------------===// // Pseudo instructions used by segmented stacks. // @@ -187,68 +217,52 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), // Alias Instructions //===----------------------------------------------------------------------===// -// Alias instructions that map movr0 to xor. +// Alias instruction mapping movr0 to xor. // FIXME: remove when we can teach regalloc that xor reg, reg is ok. // FIXME: Set encoding to pseudo. let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, - isCodeGenOnly = 1 in { -def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", - [(set GR8:$dst, 0)], IIC_ALU_NONMEM>; - -// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller -// encoding and avoids a partial-register update sometimes, but doing so -// at isel time interferes with rematerialization in the current register -// allocator. For now, this is rewritten when the instruction is lowered -// to an MCInst. -def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), - "", - [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize; - -// FIXME: Set encoding to pseudo. + isCodeGenOnly = 1 in def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", - [(set GR32:$dst, 0)], IIC_ALU_NONMEM>; + [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; + +// Other widths can also make use of the 32-bit xor, which may have a smaller +// encoding and avoid partial register updates. +def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>; +def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>; +def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> { + let AddedComplexity = 20; } -// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a -// smaller encoding, but doing so at isel time interferes with rematerialization -// in the current register allocator. For now, this is rewritten when the -// instruction is lowered to an MCInst. -// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove -// when we have a better way to specify isel priority. -let Defs = [EFLAGS], isCodeGenOnly=1, - AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "", - [(set GR64:$dst, 0)], IIC_ALU_NONMEM>; - // Materialize i64 constant where top 32-bits are zero. This could theoretically // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however // that would make it more difficult to rematerialize. let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1, - isCodeGenOnly = 1 in -def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), - "", [(set GR64:$dst, i64immZExt32:$src)], - IIC_ALU_NONMEM>; + isCodeGenOnly = 1, neverHasSideEffects = 1 in +def MOV32ri64 : Ii32<0xb8, AddRegFrm, (outs GR32:$dst), (ins i64i32imm:$src), + "", [], IIC_ALU_NONMEM>, Sched<[WriteALU]>; + +// This 64-bit pseudo-move can be used for both a 64-bit constant that is +// actually the zero-extension of a 32-bit constant, and for labels in the +// x86-64 small code model. +def mov64imm32 : ComplexPattern; + +let AddedComplexity = 1 in +def : Pat<(i64 mov64imm32:$src), + (SUBREG_TO_REG (i64 0), (MOV32ri64 mov64imm32:$src), sub_32bit)>; // Use sbb to materialize carry bit. -let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in { +let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in { // FIXME: These are pseudo ops that should be replaced with Pat<> patterns. // However, Pat<> can't replicate the destination reg into the inputs of the // result. -// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces -// X86CodeEmitter. -def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "", - [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))], - IIC_ALU_NONMEM>; -def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "", - [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))], - IIC_ALU_NONMEM>, - OpSize; -def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "", - [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))], - IIC_ALU_NONMEM>; -def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "", - [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))], - IIC_ALU_NONMEM>; +def SETB_C8r : I<0, Pseudo, (outs GR8:$dst), (ins), "", + [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; +def SETB_C16r : I<0, Pseudo, (outs GR16:$dst), (ins), "", + [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; +def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "", + [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; +def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "", + [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; } // isCodeGenOnly @@ -300,6 +314,7 @@ def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))), //===----------------------------------------------------------------------===// // String Pseudo Instructions // +let SchedRW = [WriteMicrocoded] in { let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in { def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", [(X86rep_movs i8)], IIC_REP_MOVS>, REP, @@ -362,6 +377,7 @@ let Defs = [RCX,RDI], isCodeGenOnly = 1 in { [(X86rep_stos i64)], IIC_REP_STOS>, REP, Requires<[In64BitMode]>; } +} // SchedRW //===----------------------------------------------------------------------===// // Thread Local Storage Instructions @@ -375,11 +391,16 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [ESP] in + Uses = [ESP] in { def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_addr32", [(X86tlsaddr tls32addr:$sym)]>, Requires<[In32BitMode]>; +def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), + "# TLS_base_addr32", + [(X86tlsbaseaddr tls32baseaddr:$sym)]>, + Requires<[In32BitMode]>; +} // All calls clobber the non-callee saved registers. RSP is marked as // a use to prevent stack-pointer assignments that appear immediately @@ -389,11 +410,16 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], - Uses = [RSP] in + Uses = [RSP] in { def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), "# TLS_addr64", [(X86tlsaddr tls64addr:$sym)]>, Requires<[In64BitMode]>; +def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), + "# TLS_base_addr64", + [(X86tlsbaseaddr tls64baseaddr:$sym)]>, + Requires<[In64BitMode]>; +} // Darwin TLS Support // For i386, the address of the thunk is passed on the stack, on return the @@ -443,6 +469,11 @@ def CMOV_GR16 : I<0, Pseudo, "#CMOV_GR16* PSEUDO!", [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>; +} // Predicates = [NoCMov] + +// fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no +// SSE1. +let Predicates = [FPStackf32] in def CMOV_RFP32 : I<0, Pseudo, (outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2, i8imm:$cond), @@ -450,6 +481,9 @@ def CMOV_RFP32 : I<0, Pseudo, [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond, EFLAGS))]>; +// fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no +// SSE2. +let Predicates = [FPStackf64] in def CMOV_RFP64 : I<0, Pseudo, (outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2, i8imm:$cond), @@ -464,7 +498,6 @@ def CMOV_RFP80 : I<0, Pseudo, [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond, EFLAGS))]>; -} // Predicates = [NoCMov] } // UsesCustomInserter = 1, Uses = [EFLAGS] @@ -472,130 +505,79 @@ def CMOV_RFP80 : I<0, Pseudo, // Atomic Instruction Pseudo Instructions //===----------------------------------------------------------------------===// -// Atomic exchange, and, or, xor -let Constraints = "$val = $dst", Defs = [EFLAGS], - usesCustomInserter = 1 in { - -def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), - "#ATOMAND8 PSEUDO!", - [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>; -def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), - "#ATOMOR8 PSEUDO!", - [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>; -def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), - "#ATOMXOR8 PSEUDO!", - [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>; -def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val), - "#ATOMNAND8 PSEUDO!", - [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>; - -def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMAND16 PSEUDO!", - [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>; -def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMOR16 PSEUDO!", - [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>; -def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMXOR16 PSEUDO!", - [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>; -def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMNAND16 PSEUDO!", - [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>; -def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val), - "#ATOMMIN16 PSEUDO!", - [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>; -def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMMAX16 PSEUDO!", - [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>; -def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMUMIN16 PSEUDO!", - [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>; -def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val), - "#ATOMUMAX16 PSEUDO!", - [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>; - - -def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMAND32 PSEUDO!", - [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>; -def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMOR32 PSEUDO!", - [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>; -def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMXOR32 PSEUDO!", - [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>; -def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMNAND32 PSEUDO!", - [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>; -def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val), - "#ATOMMIN32 PSEUDO!", - [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>; -def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMMAX32 PSEUDO!", - [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>; -def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMUMIN32 PSEUDO!", - [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>; -def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val), - "#ATOMUMAX32 PSEUDO!", - [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>; - - - -def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMAND64 PSEUDO!", - [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>; -def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMOR64 PSEUDO!", - [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>; -def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMXOR64 PSEUDO!", - [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>; -def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMNAND64 PSEUDO!", - [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>; -def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val), - "#ATOMMIN64 PSEUDO!", - [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>; -def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMMAX64 PSEUDO!", - [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>; -def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMUMIN64 PSEUDO!", - [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>; -def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val), - "#ATOMUMAX64 PSEUDO!", - [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>; +// Pseudo atomic instructions + +multiclass PSEUDO_ATOMIC_LOAD_BINOP { + let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in { + let Defs = [EFLAGS, AL] in + def NAME#8 : I<0, Pseudo, (outs GR8:$dst), + (ins i8mem:$ptr, GR8:$val), + !strconcat(mnemonic, "8 PSEUDO!"), []>; + let Defs = [EFLAGS, AX] in + def NAME#16 : I<0, Pseudo,(outs GR16:$dst), + (ins i16mem:$ptr, GR16:$val), + !strconcat(mnemonic, "16 PSEUDO!"), []>; + let Defs = [EFLAGS, EAX] in + def NAME#32 : I<0, Pseudo, (outs GR32:$dst), + (ins i32mem:$ptr, GR32:$val), + !strconcat(mnemonic, "32 PSEUDO!"), []>; + let Defs = [EFLAGS, RAX] in + def NAME#64 : I<0, Pseudo, (outs GR64:$dst), + (ins i64mem:$ptr, GR64:$val), + !strconcat(mnemonic, "64 PSEUDO!"), []>; + } } -let Constraints = "$val1 = $dst1, $val2 = $dst2", - Defs = [EFLAGS, EAX, EBX, ECX, EDX], - Uses = [EAX, EBX, ECX, EDX], - mayLoad = 1, mayStore = 1, - usesCustomInserter = 1 in { -def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMAND6432 PSEUDO!", []>; -def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMOR6432 PSEUDO!", []>; -def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMXOR6432 PSEUDO!", []>; -def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMNAND6432 PSEUDO!", []>; -def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMADD6432 PSEUDO!", []>; -def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMSUB6432 PSEUDO!", []>; -def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - "#ATOMSWAP6432 PSEUDO!", []>; +multiclass PSEUDO_ATOMIC_LOAD_BINOP_PATS { + def : Pat<(!cast(frag # "_8") addr:$ptr, GR8:$val), + (!cast(name # "8") addr:$ptr, GR8:$val)>; + def : Pat<(!cast(frag # "_16") addr:$ptr, GR16:$val), + (!cast(name # "16") addr:$ptr, GR16:$val)>; + def : Pat<(!cast(frag # "_32") addr:$ptr, GR32:$val), + (!cast(name # "32") addr:$ptr, GR32:$val)>; + def : Pat<(!cast(frag # "_64") addr:$ptr, GR64:$val), + (!cast(name # "64") addr:$ptr, GR64:$val)>; } +// Atomic exchange, and, or, xor +defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMAND">; +defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMOR">; +defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMXOR">; +defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMNAND">; +defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMAX">; +defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMIN">; +defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMAX">; +defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMIN">; + +defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMAND", "atomic_load_and">; +defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMOR", "atomic_load_or">; +defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMXOR", "atomic_load_xor">; +defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMNAND", "atomic_load_nand">; +defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMAX", "atomic_load_max">; +defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMIN", "atomic_load_min">; +defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">; +defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">; + +multiclass PSEUDO_ATOMIC_LOAD_BINOP6432 { + let usesCustomInserter = 1, Defs = [EFLAGS, EAX, EDX], + mayLoad = 1, mayStore = 1, hasSideEffects = 0 in + def NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), + (ins i64mem:$ptr, GR32:$val1, GR32:$val2), + !strconcat(mnemonic, "6432 PSEUDO!"), []>; +} + +defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMAND">; +defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMOR">; +defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMXOR">; +defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMNAND">; +defm ATOMADD : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMADD">; +defm ATOMSUB : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSUB">; +defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMAX">; +defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMIN">; +defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMAX">; +defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMIN">; +defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">; + //===----------------------------------------------------------------------===// // Normal-Instructions-With-Lock-Prefix Pseudo Instructions //===----------------------------------------------------------------------===// @@ -607,14 +589,14 @@ def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), // TODO: Get this to fold the constant into the instruction. let isCodeGenOnly = 1, Defs = [EFLAGS] in def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), - "lock\n\t" "or{l}\t{$zero, $dst|$dst, $zero}", - [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK; + [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK, + Sched<[WriteALULd, WriteRMW]>; let hasSideEffects = 1 in def Int_MemBarrier : I<0, Pseudo, (outs), (ins), "#MEMBARRIER", - [(X86MemBarrier)]>; + [(X86MemBarrier)]>, Sched<[WriteLoad]>; // RegOpc corresponds to the mr version of the instruction // ImmOpc corresponds to the mi version of the instruction @@ -622,79 +604,80 @@ def Int_MemBarrier : I<0, Pseudo, (outs), (ins), // ImmMod corresponds to the instruction format of the mi and mi8 versions multiclass LOCK_ArithBinOp RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8, Format ImmMod, string mnemonic> { -let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { - -def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, - RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 }, - MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), - !strconcat("lock\n\t", mnemonic, "{b}\t", +let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, + SchedRW = [WriteALULd, WriteRMW] in { + +def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, + RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 }, + MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), + !strconcat(mnemonic, "{b}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_NONMEM>, LOCK; +def NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, + RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, + MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), + !strconcat(mnemonic, "{w}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_NONMEM>, OpSize, LOCK; +def NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, + RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, + MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), + !strconcat(mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, LOCK; -def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, - RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, - MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), - !strconcat("lock\n\t", mnemonic, "{w}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_NONMEM>, OpSize, LOCK; -def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, +def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, - MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), - !strconcat("lock\n\t", mnemonic, "{l}\t", + MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), + !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_NONMEM>, LOCK; -def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, - RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, - MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), - !strconcat("lock\n\t", mnemonic, "{q}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_NONMEM>, LOCK; - -def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, - ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 }, - ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2), - !strconcat("lock\n\t", mnemonic, "{b}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_MEM>, LOCK; - -def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, - ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, - ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2), - !strconcat("lock\n\t", mnemonic, "{w}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_MEM>, LOCK; -def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, - ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, - ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2), - !strconcat("lock\n\t", mnemonic, "{l}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_MEM>, LOCK; - -def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, - ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, - ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2), - !strconcat("lock\n\t", mnemonic, "{q}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_MEM>, LOCK; - -def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, - ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, - ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2), - !strconcat("lock\n\t", mnemonic, "{w}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_MEM>, LOCK; -def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, +def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, + ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 }, + ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2), + !strconcat(mnemonic, "{b}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_MEM>, LOCK; + +def NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, + ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, + ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2), + !strconcat(mnemonic, "{w}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_MEM>, OpSize, LOCK; + +def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, + ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, + ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2), + !strconcat(mnemonic, "{l}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_MEM>, LOCK; + +def NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, + ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, + ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2), + !strconcat(mnemonic, "{q}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_MEM>, LOCK; + +def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, + ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, + ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2), + !strconcat(mnemonic, "{w}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_MEM>, OpSize, LOCK; +def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, + ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, + ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2), + !strconcat(mnemonic, "{l}\t", + "{$src2, $dst|$dst, $src2}"), + [], IIC_ALU_MEM>, LOCK; +def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, - ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2), - !strconcat("lock\n\t", mnemonic, "{l}\t", + ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2), + !strconcat(mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), [], IIC_ALU_MEM>, LOCK; -def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, - ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, - ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2), - !strconcat("lock\n\t", mnemonic, "{q}\t", - "{$src2, $dst|$dst, $src2}"), - [], IIC_ALU_MEM>, LOCK; } @@ -707,107 +690,120 @@ defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, "and">; defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">; // Optimized codegen when the non-memory output is not used. -let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { - -def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), - "lock\n\t" - "inc{b}\t$dst", [], IIC_UNARY_MEM>, LOCK; -def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), - "lock\n\t" - "inc{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK; -def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), - "lock\n\t" - "inc{l}\t$dst", [], IIC_UNARY_MEM>, LOCK; -def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), - "lock\n\t" - "inc{q}\t$dst", [], IIC_UNARY_MEM>, LOCK; - -def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), - "lock\n\t" - "dec{b}\t$dst", [], IIC_UNARY_MEM>, LOCK; -def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), - "lock\n\t" - "dec{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK; -def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), - "lock\n\t" - "dec{l}\t$dst", [], IIC_UNARY_MEM>, LOCK; -def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), - "lock\n\t" - "dec{q}\t$dst", [], IIC_UNARY_MEM>, LOCK; +multiclass LOCK_ArithUnOp Opc8, bits<8> Opc, Format Form, + string mnemonic> { +let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, + SchedRW = [WriteALULd, WriteRMW] in { + +def NAME#8m : I, LOCK; +def NAME#16m : I, OpSize, LOCK; +def NAME#32m : I, LOCK; +def NAME#64m : RI, LOCK; +} } -// Atomic compare and swap. -let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], - isCodeGenOnly = 1 in -def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr), - "lock\n\t" - "cmpxchg8b\t$ptr", - [(X86cas8 addr:$ptr)], IIC_CMPX_LOCK_8B>, TB, LOCK; +defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "inc">; +defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "dec">; -let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX], - isCodeGenOnly = 1 in -def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr), - "lock\n\t" - "cmpxchg16b\t$ptr", - [(X86cas16 addr:$ptr)], IIC_CMPX_LOCK_16B>, TB, LOCK, - Requires<[HasCmpxchg16b]>; - -let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in { -def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap), - "lock\n\t" - "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR8:$swap, 1)], IIC_CMPX_LOCK_8>, TB, LOCK; +// Atomic compare and swap. +multiclass LCMPXCHG_UnOp Opc, Format Form, string mnemonic, + SDPatternOperator frag, X86MemOperand x86memop, + InstrItinClass itin> { +let isCodeGenOnly = 1 in { + def NAME : I, TB, LOCK; +} } -let Defs = [AX, EFLAGS], Uses = [AX], isCodeGenOnly = 1 in { -def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap), - "lock\n\t" - "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR16:$swap, 2)], IIC_CMPX_LOCK>, TB, OpSize, LOCK; +multiclass LCMPXCHG_BinOp Opc8, bits<8> Opc, Format Form, + string mnemonic, SDPatternOperator frag, + InstrItinClass itin8, InstrItinClass itin> { +let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in { + let Defs = [AL, EFLAGS], Uses = [AL] in + def NAME#8 : I, TB, LOCK; + let Defs = [AX, EFLAGS], Uses = [AX] in + def NAME#16 : I, TB, OpSize, LOCK; + let Defs = [EAX, EFLAGS], Uses = [EAX] in + def NAME#32 : I, TB, LOCK; + let Defs = [RAX, EFLAGS], Uses = [RAX] in + def NAME#64 : RI, TB, LOCK; +} } -let Defs = [EAX, EFLAGS], Uses = [EAX], isCodeGenOnly = 1 in { -def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap), - "lock\n\t" - "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR32:$swap, 4)], IIC_CMPX_LOCK>, TB, LOCK; +let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], + SchedRW = [WriteALULd, WriteRMW] in { +defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", + X86cas8, i64mem, + IIC_CMPX_LOCK_8B>; } -let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in { -def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap), - "lock\n\t" - "cmpxchg{q}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR64:$swap, 8)], IIC_CMPX_LOCK>, TB, LOCK; +let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX], + Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW] in { +defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b", + X86cas16, i128mem, + IIC_CMPX_LOCK_16B>, REX_W; } +defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg", + X86cas, IIC_CMPX_LOCK_8, IIC_CMPX_LOCK>; + // Atomic exchange and add -let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in { -def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr), - "lock\n\t" - "xadd{b}\t{$val, $ptr|$ptr, $val}", - [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))], - IIC_XADD_LOCK_MEM8>, - TB, LOCK; -def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr), - "lock\n\t" - "xadd{w}\t{$val, $ptr|$ptr, $val}", - [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))], - IIC_XADD_LOCK_MEM>, - TB, OpSize, LOCK; -def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr), - "lock\n\t" - "xadd{l}\t{$val, $ptr|$ptr, $val}", - [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))], - IIC_XADD_LOCK_MEM>, - TB, LOCK; -def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr), - "lock\n\t" - "xadd{q}\t{$val, $ptr|$ptr, $val}", - [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))], - IIC_XADD_LOCK_MEM>, - TB, LOCK; +multiclass ATOMIC_LOAD_BINOP opc8, bits<8> opc, string mnemonic, + string frag, + InstrItinClass itin8, InstrItinClass itin> { + let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1, + SchedRW = [WriteALULd, WriteRMW] in { + def NAME#8 : I(frag # "_8") addr:$ptr, GR8:$val))], + itin8>; + def NAME#16 : I(frag # "_16") addr:$ptr, GR16:$val))], + itin>, OpSize; + def NAME#32 : I(frag # "_32") addr:$ptr, GR32:$val))], + itin>; + def NAME#64 : RI(frag # "_64") addr:$ptr, GR64:$val))], + itin>; + } } +defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add", + IIC_XADD_LOCK_MEM8, IIC_XADD_LOCK_MEM>, + TB, LOCK; + def ACQUIRE_MOV8rm : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src), "#ACQUIRE_MOV PSEUDO!", [(set GR8:$dst, (atomic_load_8 addr:$src))]>; @@ -937,20 +933,6 @@ def : Pat<(i64 (X86Wrapper texternalsym:$dst)), def : Pat<(i64 (X86Wrapper tblockaddress:$dst)), (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>; -// In static codegen with small code model, we can get the address of a label -// into a register with 'movl'. FIXME: This is a hack, the 'imm' predicate of -// the MOV64ri64i32 should accept these. -def : Pat<(i64 (X86Wrapper tconstpool :$dst)), - (MOV64ri64i32 tconstpool :$dst)>, Requires<[SmallCode]>; -def : Pat<(i64 (X86Wrapper tjumptable :$dst)), - (MOV64ri64i32 tjumptable :$dst)>, Requires<[SmallCode]>; -def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), - (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>; -def : Pat<(i64 (X86Wrapper texternalsym:$dst)), - (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>; -def : Pat<(i64 (X86Wrapper tblockaddress:$dst)), - (MOV64ri64i32 tblockaddress:$dst)>, Requires<[SmallCode]>; - // In kernel code model, we can get the address of a label // into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of // the MOV64ri32 should accept these. @@ -995,9 +977,6 @@ def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)), // This corresponds to add $foo@tpoff, %rax def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)), (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>; -// This corresponds to mov foo@tpoff(%rbx), %eax -def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))), - (MOV64rm tglobaltlsaddr :$dst)>; // Direct PC relative function call for small code model. 32-bit displacement @@ -1007,9 +986,26 @@ def : Pat<(X86call (i64 tglobaladdr:$dst)), def : Pat<(X86call (i64 texternalsym:$dst)), (CALL64pcrel32 texternalsym:$dst)>; -// tailcall stuff -def : Pat<(X86tcret GR32_TC:$dst, imm:$off), - (TCRETURNri GR32_TC:$dst, imm:$off)>, +// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they +// can never use callee-saved registers. That is the purpose of the GR64_TC +// register classes. +// +// The only volatile register that is never used by the calling convention is +// %r11. This happens when calling a vararg function with 6 arguments. +// +// Match an X86tcret that uses less than 7 volatile registers. +def X86tcret_6regs : PatFrag<(ops node:$ptr, node:$off), + (X86tcret node:$ptr, node:$off), [{ + // X86tcret args: (*chain, ptr, imm, regs..., glue) + unsigned NumRegs = 0; + for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i) + if (isa(N->getOperand(i)) && ++NumRegs > 6) + return false; + return true; +}]>; + +def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), + (TCRETURNri ptr_rc_tailcall:$dst, imm:$off)>, Requires<[In32BitMode]>; // FIXME: This is disabled for 32-bit PIC mode because the global base @@ -1031,7 +1027,9 @@ def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>, Requires<[In64BitMode]>; -def : Pat<(X86tcret (load addr:$dst), imm:$off), +// Don't fold loads into X86tcret requiring more than 6 regs. +// There wouldn't be enough scratch registers for base+index. +def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off), (TCRETURNmi64 addr:$dst, imm:$off)>, Requires<[In64BitMode]>; @@ -1067,12 +1065,14 @@ def : Pat<(X86cmp GR64:$src1, 0), // inverted. multiclass CMOVmr { - def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS), - (Inst16 GR16:$src2, addr:$src1)>; - def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS), - (Inst32 GR32:$src2, addr:$src1)>; - def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS), - (Inst64 GR64:$src2, addr:$src1)>; + let Predicates = [HasCMov] in { + def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS), + (Inst16 GR16:$src2, addr:$src1)>; + def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS), + (Inst32 GR32:$src2, addr:$src1)>; + def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS), + (Inst64 GR64:$src2, addr:$src1)>; + } } defm : CMOVmr; @@ -1096,7 +1096,8 @@ defm : CMOVmr; def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>; def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>; def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; -def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; +def : Pat<(zextloadi64i1 addr:$src), + (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>; // extload bool -> extload byte // When extloading from 16-bit and smaller memory locations into 64-bit @@ -1110,14 +1111,16 @@ def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>; def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>; def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>; -def : Pat<(extloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>; -def : Pat<(extloadi64i8 addr:$src), (MOVZX64rm8 addr:$src)>; -def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>; // For other extloads, use subregs, since the high contents of the register are // defined after an extload. +def : Pat<(extloadi64i1 addr:$src), + (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>; +def : Pat<(extloadi64i8 addr:$src), + (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>; +def : Pat<(extloadi64i16 addr:$src), + (SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>; def : Pat<(extloadi64i32 addr:$src), - (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), - sub_32bit)>; + (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>; // anyext. Define these to do an explicit zero-extend to // avoid partial-register updates. @@ -1129,8 +1132,10 @@ def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; def : Pat<(i32 (anyext GR16:$src)), (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>; -def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>; -def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>; +def : Pat<(i64 (anyext GR8 :$src)), + (SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8 :$src), sub_32bit)>; +def : Pat<(i64 (anyext GR16:$src)), + (SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16 :$src), sub_32bit)>; def : Pat<(i64 (anyext GR32:$src)), (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; @@ -1176,7 +1181,8 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. -let AddedComplexity = 5 in { // Try this before the selecting to OR +// Try this before the selecting to OR. +let AddedComplexity = 5, SchedRW = [WriteALU] in { let isConvertibleToThreeAddress = 1, Constraints = "$src1 = $dst", Defs = [EFLAGS] in { @@ -1223,7 +1229,7 @@ def ADD64ri32_DB : I<0, Pseudo, [(set GR64:$dst, (or_is_add GR64:$src1, i64immSExt32:$src2))]>; } -} // AddedComplexity +} // AddedComplexity, SchedRW //===----------------------------------------------------------------------===// @@ -1294,13 +1300,19 @@ def : Pat<(and GR16:$src1, 0xff), // r & (2^32-1) ==> movz def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), - (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; + (SUBREG_TO_REG (i64 0), + (MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)), + sub_32bit)>; // r & (2^16-1) ==> movz def : Pat<(and GR64:$src, 0xffff), - (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>; + (SUBREG_TO_REG (i64 0), + (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))), + sub_32bit)>; // r & (2^8-1) ==> movz def : Pat<(and GR64:$src, 0xff), - (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>; + (SUBREG_TO_REG (i64 0), + (MOVZX32rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit))), + sub_32bit)>; // r & (2^8-1) ==> movz def : Pat<(and GR32:$src1, 0xff), (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,