X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FR600%2FAMDGPUInstructions.td;h=cf3bffac968c907feed8d4840b1709e3c45a09b5;hb=f50f927d65a97737d3e31229f9006c989bbc1fc4;hp=6745fed3baecdc2769ce56a3520ce937f8d9e601;hpb=79916948e1fd176a3898b596b679cc9dba3d40a8;p=oota-llvm.git diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 6745fed3bae..cf3bffac968 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -34,58 +34,149 @@ class AMDGPUShaderInst pattern> } +def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">; +def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">; +def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; + def InstFlag : OperandWithDefaultOps ; +def ADDRIndirect : ComplexPattern; -def COND_EQ : PatLeaf < +let OperandType = "OPERAND_IMMEDIATE" in { + +def u32imm : Operand { + let PrintMethod = "printU32ImmOperand"; +} + +def u16imm : Operand { + let PrintMethod = "printU16ImmOperand"; +} + +def u8imm : Operand { + let PrintMethod = "printU8ImmOperand"; +} + +} // End OperandType = "OPERAND_IMMEDIATE" + +//===--------------------------------------------------------------------===// +// Custom Operands +//===--------------------------------------------------------------------===// +def brtarget : Operand; + +//===----------------------------------------------------------------------===// +// PatLeafs for floating-point comparisons +//===----------------------------------------------------------------------===// + +def COND_OEQ : PatLeaf < (cond), - [{switch(N->get()){{default: return false; - case ISD::SETOEQ: case ISD::SETUEQ: - case ISD::SETEQ: return true;}}}] + [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}] >; -def COND_NE : PatLeaf < +def COND_OGT : PatLeaf < (cond), - [{switch(N->get()){{default: return false; - case ISD::SETONE: case ISD::SETUNE: - case ISD::SETNE: return true;}}}] + [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}] >; -def COND_GT : PatLeaf < + +def COND_OGE : PatLeaf < (cond), - [{switch(N->get()){{default: return false; - case ISD::SETOGT: case ISD::SETUGT: - case ISD::SETGT: return true;}}}] + [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}] >; -def COND_GE : PatLeaf < +def COND_OLT : PatLeaf < (cond), - [{switch(N->get()){{default: return false; - case ISD::SETOGE: case ISD::SETUGE: - case ISD::SETGE: return true;}}}] + [{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}] >; -def COND_LT : PatLeaf < +def COND_OLE : PatLeaf < + (cond), + [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}] +>; + +def COND_UNE : PatLeaf < + (cond), + [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}] +>; + +def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>; +def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>; + +//===----------------------------------------------------------------------===// +// PatLeafs for unsigned comparisons +//===----------------------------------------------------------------------===// + +def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>; +def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>; +def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>; +def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>; + +//===----------------------------------------------------------------------===// +// PatLeafs for signed comparisons +//===----------------------------------------------------------------------===// + +def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>; +def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>; +def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>; +def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>; + +//===----------------------------------------------------------------------===// +// PatLeafs for integer equality +//===----------------------------------------------------------------------===// + +def COND_EQ : PatLeaf < (cond), - [{switch(N->get()){{default: return false; - case ISD::SETOLT: case ISD::SETULT: - case ISD::SETLT: return true;}}}] + [{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}] >; -def COND_LE : PatLeaf < +def COND_NE : PatLeaf < (cond), - [{switch(N->get()){{default: return false; - case ISD::SETOLE: case ISD::SETULE: - case ISD::SETLE: return true;}}}] + [{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}] >; def COND_NULL : PatLeaf < (cond), - [{return false;}] + [{(void)N; return false;}] >; //===----------------------------------------------------------------------===// // Load/Store Pattern Fragments //===----------------------------------------------------------------------===// +class PrivateMemOp : PatFrag (N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; +}]>; + +class PrivateLoad : PrivateMemOp < + (ops node:$ptr), (op node:$ptr) +>; + +class PrivateStore : PrivateMemOp < + (ops node:$value, node:$ptr), (op node:$value, node:$ptr) +>; + +def extloadi8_private : PrivateLoad ; +def sextloadi8_private : PrivateLoad ; +def extloadi16_private : PrivateLoad ; +def sextloadi16_private : PrivateLoad ; +def load_private : PrivateLoad ; + +def truncstorei8_private : PrivateStore ; +def truncstorei16_private : PrivateStore ; +def store_private : PrivateStore ; + +def global_store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast(N)); +}]>; + +// Global address space loads +def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isGlobalLoad(dyn_cast(N)); +}]>; + +// Constant address space loads +def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isConstantLoad(dyn_cast(N), -1); +}]>; + def az_extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ LoadSDNode *L = cast(N); return L->getExtensionType() == ISD::ZEXTLOAD || @@ -191,21 +282,74 @@ def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return isLocalLoad(dyn_cast(N)); }]>; -def atomic_load_add_local : PatFrag<(ops node:$ptr, node:$value), - (atomic_load_add node:$ptr, node:$value), [{ - return dyn_cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +class Aligned8Bytes : PatFrag (N)->getAlignment() % 8 == 0; }]>; +def local_load_aligned8bytes : Aligned8Bytes < + (ops node:$ptr), (local_load node:$ptr) +>; + +def local_store_aligned8bytes : Aligned8Bytes < + (ops node:$val, node:$ptr), (local_store node:$val, node:$ptr) +>; + +class local_binary_atomic_op : + PatFrag<(ops node:$ptr, node:$value), + (atomic_op node:$ptr, node:$value), [{ + return cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +}]>; + + +def atomic_swap_local : local_binary_atomic_op; +def atomic_load_add_local : local_binary_atomic_op; +def atomic_load_sub_local : local_binary_atomic_op; +def atomic_load_and_local : local_binary_atomic_op; +def atomic_load_or_local : local_binary_atomic_op; +def atomic_load_xor_local : local_binary_atomic_op; +def atomic_load_nand_local : local_binary_atomic_op; +def atomic_load_min_local : local_binary_atomic_op; +def atomic_load_max_local : local_binary_atomic_op; +def atomic_load_umin_local : local_binary_atomic_op; +def atomic_load_umax_local : local_binary_atomic_op; + def mskor_global : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ return dyn_cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; }]>; +def atomic_cmp_swap_32_local : + PatFrag<(ops node:$ptr, node:$cmp, node:$swap), + (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{ + AtomicSDNode *AN = cast(N); + return AN->getMemoryVT() == MVT::i32 && + AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +}]>; + +def atomic_cmp_swap_64_local : + PatFrag<(ops node:$ptr, node:$cmp, node:$swap), + (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{ + AtomicSDNode *AN = cast(N); + return AN->getMemoryVT() == MVT::i64 && + AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +}]>; + +//===----------------------------------------------------------------------===// +// Misc Pattern Fragments +//===----------------------------------------------------------------------===// + +def fmad : PatFrag < + (ops node:$src0, node:$src1, node:$src2), + (fadd (fmul node:$src0, node:$src1), node:$src2) +>; + class Constants { int TWO_PI = 0x40c90fdb; int PI = 0x40490fdb; int TWO_PI_INV = 0x3e22f983; -int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding +int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding +int FP32_NEG_ONE = 0xbf800000; +int FP32_ONE = 0x3f800000; } def CONST : Constants; @@ -219,9 +363,6 @@ def FP_ONE : PatLeaf < [{return N->isExactlyValue(1.0);}] >; -def U24 : ComplexPattern; -def I24 : ComplexPattern; - let isCodeGenOnly = 1, isPseudo = 1 in { let usesCustomInserter = 1 in { @@ -230,7 +371,7 @@ class CLAMP : AMDGPUShaderInst < (outs rc:$dst), (ins rc:$src0), "CLAMP $dst, $src0", - [(set f32:$dst, (int_AMDIL_clamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))] + [(set f32:$dst, (AMDGPUclamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))] >; class FABS : AMDGPUShaderInst < @@ -251,6 +392,8 @@ class FNEG : AMDGPUShaderInst < multiclass RegisterLoadStore { +let UseNamedOperandTable = 1 in { + def RegisterLoad : AMDGPUShaderInst < (outs dstClass:$dst), (ins addrClass:$addr, i32imm:$chan), @@ -269,6 +412,7 @@ multiclass RegisterLoadStore /* --------------------- */ /* Extract element pattern */ -class Extract_Element : Pat< (sub_type (vector_extract vec_type:$src, sub_idx)), @@ -300,12 +444,6 @@ class Insert_Element ; -class Vector4_Build : Pat < - (vecType (build_vector elemType:$x, elemType:$y, elemType:$z, elemType:$w)), - (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG - (vecType (IMPLICIT_DEF)), $x, sub0), $y, sub1), $z, sub2), $w, sub3) ->; - // XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer // can handle COPY instructions. // bitconvert pattern @@ -323,7 +461,7 @@ class DwordAddrPat : Pat < // BFI_INT patterns -multiclass BFIPatterns { +multiclass BFIPatterns { // Definition from ISA doc: // (y & x) | (z & ~x) @@ -339,6 +477,19 @@ multiclass BFIPatterns { (BFI_INT $x, $y, $z) >; + def : Pat < + (fcopysign f32:$src0, f32:$src1), + (BFI_INT (LoadImm32 0x7fffffff), $src0, $src1) + >; + + def : Pat < + (f64 (fcopysign f64:$src0, f64:$src1)), + (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (i32 (EXTRACT_SUBREG $src0, sub0)), sub0), + (BFI_INT (LoadImm32 0x7fffffff), + (i32 (EXTRACT_SUBREG $src0, sub1)), + (i32 (EXTRACT_SUBREG $src1, sub1))), sub1) + >; } // SHA-256 Ma patterns @@ -351,6 +502,11 @@ class SHA256MaPattern : Pat < // Bitfield extract patterns +/* + +XXX: The BFE pattern is not working correctly because the XForm is not being +applied. + def legalshift32 : ImmLeaf =0 && Imm < 32;}]>; def bfemask : PatLeaf <(imm), [{return isMask_32(N->getZExtValue());}], SDNodeXFormgetTargetConstant(CountTrailingOnes_32(N->getZExtValue()), MVT::i32);}]>>; @@ -360,6 +516,8 @@ class BFEPattern : Pat < (BFE $x, $y, $z) >; +*/ + // rotr pattern class ROTRPattern : Pat < (rotr i32:$src0, i32:$src1), @@ -376,7 +534,61 @@ class UMUL24Pattern : Pat < >; */ +class IMad24Pat : Pat < + (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2), + (Inst $src0, $src1, $src2) +>; + +class UMad24Pat : Pat < + (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2), + (Inst $src0, $src1, $src2) +>; + +multiclass Expand24IBitOps { + def _expand_imad24 : Pat < + (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2), + (AddInst (MulInst $src0, $src1), $src2) + >; + + def _expand_imul24 : Pat < + (AMDGPUmul_i24 i32:$src0, i32:$src1), + (MulInst $src0, $src1) + >; +} + +multiclass Expand24UBitOps { + def _expand_umad24 : Pat < + (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2), + (AddInst (MulInst $src0, $src1), $src2) + >; + + def _expand_umul24 : Pat < + (AMDGPUmul_u24 i32:$src0, i32:$src1), + (MulInst $src0, $src1) + >; +} + +class RcpPat : Pat < + (fdiv FP_ONE, vt:$src), + (RcpInst $src) +>; + +multiclass RsqPat { + def : Pat < + (fdiv FP_ONE, (fsqrt vt:$src)), + (RsqInst $src) + >; + + def : Pat < + (AMDGPUrcp (fsqrt vt:$src)), + (RsqInst $src) + >; +} + include "R600Instructions.td" +include "R700Instructions.td" +include "EvergreenInstructions.td" +include "CaymanInstructions.td" include "SIInstrInfo.td"