lib/Target/R600/AMDILISelLowering.cpp

   1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //==-----------------------------------------------------------------------===//
   9 //
  10 /// \file
  11 /// \brief TargetLowering functions borrowed from AMDIL.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "AMDGPUISelLowering.h"
  16 #include "AMDGPURegisterInfo.h"
  17 #include "AMDGPUSubtarget.h"
  18 #include "AMDILDevices.h"
  19 #include "AMDILIntrinsicInfo.h"
  20 #include "llvm/CodeGen/MachineFrameInfo.h"
  21 #include "llvm/CodeGen/MachineRegisterInfo.h"
  22 #include "llvm/CodeGen/PseudoSourceValue.h"
  23 #include "llvm/CodeGen/SelectionDAG.h"
  24 #include "llvm/CodeGen/SelectionDAGNodes.h"
  25 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  26 #include "llvm/IR/CallingConv.h"
  27 #include "llvm/IR/DerivedTypes.h"
  28 #include "llvm/IR/Instructions.h"
  29 #include "llvm/IR/Intrinsics.h"
  30 #include "llvm/Support/raw_ostream.h"
  31 #include "llvm/Target/TargetInstrInfo.h"
  32 #include "llvm/Target/TargetOptions.h"
  33
  34 using namespace llvm;
  35 //===----------------------------------------------------------------------===//
  36 // TargetLowering Implementation Help Functions End
  37 //===----------------------------------------------------------------------===//
  38
  39 //===----------------------------------------------------------------------===//
  40 // TargetLowering Class Implementation Begins
  41 //===----------------------------------------------------------------------===//
  42 void AMDGPUTargetLowering::InitAMDILLowering() {
  43   int types[] = {
  44     (int)MVT::i8,
  45     (int)MVT::i16,
  46     (int)MVT::i32,
  47     (int)MVT::f32,
  48     (int)MVT::f64,
  49     (int)MVT::i64,
  50     (int)MVT::v2i8,
  51     (int)MVT::v4i8,
  52     (int)MVT::v2i16,
  53     (int)MVT::v4i16,
  54     (int)MVT::v4f32,
  55     (int)MVT::v4i32,
  56     (int)MVT::v2f32,
  57     (int)MVT::v2i32,
  58     (int)MVT::v2f64,
  59     (int)MVT::v2i64
  60   };
  61
  62   int IntTypes[] = {
  63     (int)MVT::i8,
  64     (int)MVT::i16,
  65     (int)MVT::i32,
  66     (int)MVT::i64
  67   };
  68
  69   int FloatTypes[] = {
  70     (int)MVT::f32,
  71     (int)MVT::f64
  72   };
  73
  74   int VectorTypes[] = {
  75     (int)MVT::v2i8,
  76     (int)MVT::v4i8,
  77     (int)MVT::v2i16,
  78     (int)MVT::v4i16,
  79     (int)MVT::v4f32,
  80     (int)MVT::v4i32,
  81     (int)MVT::v2f32,
  82     (int)MVT::v2i32,
  83     (int)MVT::v2f64,
  84     (int)MVT::v2i64
  85   };
  86   size_t NumTypes = sizeof(types) / sizeof(*types);
  87   size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
  88   size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
  89   size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
  90
  91   const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
  92   // These are the current register classes that are
  93   // supported
  94
  95   for (unsigned int x  = 0; x < NumTypes; ++x) {
  96     MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
  97
  98     //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
  99     // We cannot sextinreg, expand to shifts
 100     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
 101     setOperationAction(ISD::SUBE, VT, Expand);
 102     setOperationAction(ISD::SUBC, VT, Expand);
 103     setOperationAction(ISD::ADDE, VT, Expand);
 104     setOperationAction(ISD::ADDC, VT, Expand);
 105     setOperationAction(ISD::BRCOND, VT, Custom);
 106     setOperationAction(ISD::BR_JT, VT, Expand);
 107     setOperationAction(ISD::BRIND, VT, Expand);
 108     // TODO: Implement custom UREM/SREM routines
 109     setOperationAction(ISD::SREM, VT, Expand);
 110     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 111     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 112     if (VT != MVT::i64 && VT != MVT::v2i64) {
 113       setOperationAction(ISD::SDIV, VT, Custom);
 114     }
 115   }
 116   for (unsigned int x = 0; x < NumFloatTypes; ++x) {
 117     MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
 118
 119     // IL does not have these operations for floating point types
 120     setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
 121     setOperationAction(ISD::SETOLT, VT, Expand);
 122     setOperationAction(ISD::SETOGE, VT, Expand);
 123     setOperationAction(ISD::SETOGT, VT, Expand);
 124     setOperationAction(ISD::SETOLE, VT, Expand);
 125     setOperationAction(ISD::SETULT, VT, Expand);
 126     setOperationAction(ISD::SETUGE, VT, Expand);
 127     setOperationAction(ISD::SETUGT, VT, Expand);
 128     setOperationAction(ISD::SETULE, VT, Expand);
 129   }
 130
 131   for (unsigned int x = 0; x < NumIntTypes; ++x) {
 132     MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
 133
 134     // GPU also does not have divrem function for signed or unsigned
 135     setOperationAction(ISD::SDIVREM, VT, Expand);
 136
 137     // GPU does not have [S|U]MUL_LOHI functions as a single instruction
 138     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 139     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 140
 141     // GPU doesn't have a rotl, rotr, or byteswap instruction
 142     setOperationAction(ISD::ROTR, VT, Expand);
 143     setOperationAction(ISD::BSWAP, VT, Expand);
 144
 145     // GPU doesn't have any counting operators
 146     setOperationAction(ISD::CTPOP, VT, Expand);
 147     setOperationAction(ISD::CTTZ, VT, Expand);
 148     setOperationAction(ISD::CTLZ, VT, Expand);
 149   }
 150
 151   for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
 152     MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
 153
 154     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
 155     setOperationAction(ISD::SDIVREM, VT, Expand);
 156     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 157     // setOperationAction(ISD::VSETCC, VT, Expand);
 158     setOperationAction(ISD::SELECT_CC, VT, Expand);
 159
 160   }
 161   if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
 162     setOperationAction(ISD::MULHU, MVT::i64, Expand);
 163     setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
 164     setOperationAction(ISD::MULHS, MVT::i64, Expand);
 165     setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
 166     setOperationAction(ISD::ADD, MVT::v2i64, Expand);
 167     setOperationAction(ISD::SREM, MVT::v2i64, Expand);
 168     setOperationAction(ISD::Constant          , MVT::i64  , Legal);
 169     setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
 170     setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
 171     setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
 172     setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
 173     setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
 174   }
 175   if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
 176     // we support loading/storing v2f64 but not operations on the type
 177     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
 178     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
 179     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
 180     setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
 181     setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
 182     setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
 183     // We want to expand vector conversions into their scalar
 184     // counterparts.
 185     setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
 186     setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
 187     setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
 188     setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
 189     setOperationAction(ISD::FABS, MVT::f64, Expand);
 190     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
 191   }
 192   // TODO: Fix the UDIV24 algorithm so it works for these
 193   // types correctly. This needs vector comparisons
 194   // for this to work correctly.
 195   setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
 196   setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
 197   setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
 198   setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
 199   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
 200   setOperationAction(ISD::SUBC, MVT::Other, Expand);
 201   setOperationAction(ISD::ADDE, MVT::Other, Expand);
 202   setOperationAction(ISD::ADDC, MVT::Other, Expand);
 203   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
 204   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
 205   setOperationAction(ISD::BRIND, MVT::Other, Expand);
 206   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
 207
 208
 209   // Use the default implementation.
 210   setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
 211   setOperationAction(ISD::Constant          , MVT::i32    , Legal);
 212
 213   setSchedulingPreference(Sched::RegPressure);
 214   setPow2DivIsCheap(false);
 215   setSelectIsExpensive(true);
 216   setJumpIsExpensive(true);
 217
 218   MaxStoresPerMemcpy  = 4096;
 219   MaxStoresPerMemmove = 4096;
 220   MaxStoresPerMemset  = 4096;
 221
 222 }
 223
 224 bool
 225 AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
 226     const CallInst &I, unsigned Intrinsic) const {
 227   return false;
 228 }
 229
 230 // The backend supports 32 and 64 bit floating point immediates
 231 bool
 232 AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
 233   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
 234       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
 235     return true;
 236   } else {
 237     return false;
 238   }
 239 }
 240
 241 bool
 242 AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
 243   if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
 244       || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
 245     return false;
 246   } else {
 247     return true;
 248   }
 249 }
 250
 251
 252 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
 253 // be zero. Op is expected to be a target specific node. Used by DAG
 254 // combiner.
 255
 256 void
 257 AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
 258     const SDValue Op,
 259     APInt &KnownZero,
 260     APInt &KnownOne,
 261     const SelectionDAG &DAG,
 262     unsigned Depth) const {
 263   APInt KnownZero2;
 264   APInt KnownOne2;
 265   KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
 266   switch (Op.getOpcode()) {
 267     default: break;
 268     case ISD::SELECT_CC:
 269              DAG.ComputeMaskedBits(
 270                  Op.getOperand(1),
 271                  KnownZero,
 272                  KnownOne,
 273                  Depth + 1
 274                  );
 275              DAG.ComputeMaskedBits(
 276                  Op.getOperand(0),
 277                  KnownZero2,
 278                  KnownOne2
 279                  );
 280              assert((KnownZero & KnownOne) == 0
 281                  && "Bits known to be one AND zero?");
 282              assert((KnownZero2 & KnownOne2) == 0
 283                  && "Bits known to be one AND zero?");
 284              // Only known if known in both the LHS and RHS
 285              KnownOne &= KnownOne2;
 286              KnownZero &= KnownZero2;
 287              break;
 288   };
 289 }
 290
 291 //===----------------------------------------------------------------------===//
 292 //                           Other Lowering Hooks
 293 //===----------------------------------------------------------------------===//
 294
 295 SDValue
 296 AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
 297   EVT OVT = Op.getValueType();
 298   SDValue DST;
 299   if (OVT.getScalarType() == MVT::i64) {
 300     DST = LowerSDIV64(Op, DAG);
 301   } else if (OVT.getScalarType() == MVT::i32) {
 302     DST = LowerSDIV32(Op, DAG);
 303   } else if (OVT.getScalarType() == MVT::i16
 304       || OVT.getScalarType() == MVT::i8) {
 305     DST = LowerSDIV24(Op, DAG);
 306   } else {
 307     DST = SDValue(Op.getNode(), 0);
 308   }
 309   return DST;
 310 }
 311
 312 SDValue
 313 AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
 314   EVT OVT = Op.getValueType();
 315   SDValue DST;
 316   if (OVT.getScalarType() == MVT::i64) {
 317     DST = LowerSREM64(Op, DAG);
 318   } else if (OVT.getScalarType() == MVT::i32) {
 319     DST = LowerSREM32(Op, DAG);
 320   } else if (OVT.getScalarType() == MVT::i16) {
 321     DST = LowerSREM16(Op, DAG);
 322   } else if (OVT.getScalarType() == MVT::i8) {
 323     DST = LowerSREM8(Op, DAG);
 324   } else {
 325     DST = SDValue(Op.getNode(), 0);
 326   }
 327   return DST;
 328 }
 329
 330 SDValue
 331 AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const {
 332   SDValue Data = Op.getOperand(0);
 333   VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
 334   DebugLoc DL = Op.getDebugLoc();
 335   EVT DVT = Data.getValueType();
 336   EVT BVT = BaseType->getVT();
 337   unsigned baseBits = BVT.getScalarType().getSizeInBits();
 338   unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
 339   unsigned shiftBits = srcBits - baseBits;
 340   if (srcBits < 32) {
 341     // If the op is less than 32 bits, then it needs to extend to 32bits
 342     // so it can properly keep the upper bits valid.
 343     EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
 344     Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
 345     shiftBits = 32 - baseBits;
 346     DVT = IVT;
 347   }
 348   SDValue Shift = DAG.getConstant(shiftBits, DVT);
 349   // Shift left by 'Shift' bits.
 350   Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
 351   // Signed shift Right by 'Shift' bits.
 352   Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
 353   if (srcBits < 32) {
 354     // Once the sign extension is done, the op needs to be converted to
 355     // its original type.
 356     Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
 357   }
 358   return Data;
 359 }
 360 EVT
 361 AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
 362   int iSize = (size * numEle);
 363   int vEle = (iSize >> ((size == 64) ? 6 : 5));
 364   if (!vEle) {
 365     vEle = 1;
 366   }
 367   if (size == 64) {
 368     if (vEle == 1) {
 369       return EVT(MVT::i64);
 370     } else {
 371       return EVT(MVT::getVectorVT(MVT::i64, vEle));
 372     }
 373   } else {
 374     if (vEle == 1) {
 375       return EVT(MVT::i32);
 376     } else {
 377       return EVT(MVT::getVectorVT(MVT::i32, vEle));
 378     }
 379   }
 380 }
 381
 382 SDValue
 383 AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
 384   SDValue Chain = Op.getOperand(0);
 385   SDValue Cond  = Op.getOperand(1);
 386   SDValue Jump  = Op.getOperand(2);
 387   SDValue Result;
 388   Result = DAG.getNode(
 389       AMDGPUISD::BRANCH_COND,
 390       Op.getDebugLoc(),
 391       Op.getValueType(),
 392       Chain, Jump, Cond);
 393   return Result;
 394 }
 395
 396 SDValue
 397 AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
 398   DebugLoc DL = Op.getDebugLoc();
 399   EVT OVT = Op.getValueType();
 400   SDValue LHS = Op.getOperand(0);
 401   SDValue RHS = Op.getOperand(1);
 402   MVT INTTY;
 403   MVT FLTTY;
 404   if (!OVT.isVector()) {
 405     INTTY = MVT::i32;
 406     FLTTY = MVT::f32;
 407   } else if (OVT.getVectorNumElements() == 2) {
 408     INTTY = MVT::v2i32;
 409     FLTTY = MVT::v2f32;
 410   } else if (OVT.getVectorNumElements() == 4) {
 411     INTTY = MVT::v4i32;
 412     FLTTY = MVT::v4f32;
 413   }
 414   unsigned bitsize = OVT.getScalarType().getSizeInBits();
 415   // char|short jq = ia ^ ib;
 416   SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
 417
 418   // jq = jq >> (bitsize - 2)
 419   jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
 420
 421   // jq = jq | 0x1
 422   jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
 423
 424   // jq = (int)jq
 425   jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
 426
 427   // int ia = (int)LHS;
 428   SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
 429
 430   // int ib, (int)RHS;
 431   SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
 432
 433   // float fa = (float)ia;
 434   SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
 435
 436   // float fb = (float)ib;
 437   SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
 438
 439   // float fq = native_divide(fa, fb);
 440   SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
 441
 442   // fq = trunc(fq);
 443   fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
 444
 445   // float fqneg = -fq;
 446   SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
 447
 448   // float fr = mad(fqneg, fb, fa);
 449   SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
 450       DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
 451
 452   // int iq = (int)fq;
 453   SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
 454
 455   // fr = fabs(fr);
 456   fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
 457
 458   // fb = fabs(fb);
 459   fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
 460
 461   // int cv = fr >= fb;
 462   SDValue cv;
 463   if (INTTY == MVT::i32) {
 464     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
 465   } else {
 466     cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
 467   }
 468   // jq = (cv ? jq : 0);
 469   jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
 470       DAG.getConstant(0, OVT));
 471   // dst = iq + jq;
 472   iq = DAG.getSExtOrTrunc(iq, DL, OVT);
 473   iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
 474   return iq;
 475 }
 476
 477 SDValue
 478 AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
 479   DebugLoc DL = Op.getDebugLoc();
 480   EVT OVT = Op.getValueType();
 481   SDValue LHS = Op.getOperand(0);
 482   SDValue RHS = Op.getOperand(1);
 483   // The LowerSDIV32 function generates equivalent to the following IL.
 484   // mov r0, LHS
 485   // mov r1, RHS
 486   // ilt r10, r0, 0
 487   // ilt r11, r1, 0
 488   // iadd r0, r0, r10
 489   // iadd r1, r1, r11
 490   // ixor r0, r0, r10
 491   // ixor r1, r1, r11
 492   // udiv r0, r0, r1
 493   // ixor r10, r10, r11
 494   // iadd r0, r0, r10
 495   // ixor DST, r0, r10
 496
 497   // mov r0, LHS
 498   SDValue r0 = LHS;
 499
 500   // mov r1, RHS
 501   SDValue r1 = RHS;
 502
 503   // ilt r10, r0, 0
 504   SDValue r10 = DAG.getSelectCC(DL,
 505       r0, DAG.getConstant(0, OVT),
 506       DAG.getConstant(-1, MVT::i32),
 507       DAG.getConstant(0, MVT::i32),
 508       ISD::SETLT);
 509
 510   // ilt r11, r1, 0
 511   SDValue r11 = DAG.getSelectCC(DL,
 512       r1, DAG.getConstant(0, OVT),
 513       DAG.getConstant(-1, MVT::i32),
 514       DAG.getConstant(0, MVT::i32),
 515       ISD::SETLT);
 516
 517   // iadd r0, r0, r10
 518   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 519
 520   // iadd r1, r1, r11
 521   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
 522
 523   // ixor r0, r0, r10
 524   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 525
 526   // ixor r1, r1, r11
 527   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
 528
 529   // udiv r0, r0, r1
 530   r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
 531
 532   // ixor r10, r10, r11
 533   r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
 534
 535   // iadd r0, r0, r10
 536   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 537
 538   // ixor DST, r0, r10
 539   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 540   return DST;
 541 }
 542
 543 SDValue
 544 AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
 545   return SDValue(Op.getNode(), 0);
 546 }
 547
 548 SDValue
 549 AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
 550   DebugLoc DL = Op.getDebugLoc();
 551   EVT OVT = Op.getValueType();
 552   MVT INTTY = MVT::i32;
 553   if (OVT == MVT::v2i8) {
 554     INTTY = MVT::v2i32;
 555   } else if (OVT == MVT::v4i8) {
 556     INTTY = MVT::v4i32;
 557   }
 558   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
 559   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
 560   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
 561   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
 562   return LHS;
 563 }
 564
 565 SDValue
 566 AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
 567   DebugLoc DL = Op.getDebugLoc();
 568   EVT OVT = Op.getValueType();
 569   MVT INTTY = MVT::i32;
 570   if (OVT == MVT::v2i16) {
 571     INTTY = MVT::v2i32;
 572   } else if (OVT == MVT::v4i16) {
 573     INTTY = MVT::v4i32;
 574   }
 575   SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
 576   SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
 577   LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
 578   LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
 579   return LHS;
 580 }
 581
 582 SDValue
 583 AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
 584   DebugLoc DL = Op.getDebugLoc();
 585   EVT OVT = Op.getValueType();
 586   SDValue LHS = Op.getOperand(0);
 587   SDValue RHS = Op.getOperand(1);
 588   // The LowerSREM32 function generates equivalent to the following IL.
 589   // mov r0, LHS
 590   // mov r1, RHS
 591   // ilt r10, r0, 0
 592   // ilt r11, r1, 0
 593   // iadd r0, r0, r10
 594   // iadd r1, r1, r11
 595   // ixor r0, r0, r10
 596   // ixor r1, r1, r11
 597   // udiv r20, r0, r1
 598   // umul r20, r20, r1
 599   // sub r0, r0, r20
 600   // iadd r0, r0, r10
 601   // ixor DST, r0, r10
 602
 603   // mov r0, LHS
 604   SDValue r0 = LHS;
 605
 606   // mov r1, RHS
 607   SDValue r1 = RHS;
 608
 609   // ilt r10, r0, 0
 610   SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
 611
 612   // ilt r11, r1, 0
 613   SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
 614
 615   // iadd r0, r0, r10
 616   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 617
 618   // iadd r1, r1, r11
 619   r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
 620
 621   // ixor r0, r0, r10
 622   r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 623
 624   // ixor r1, r1, r11
 625   r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
 626
 627   // udiv r20, r0, r1
 628   SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
 629
 630   // umul r20, r20, r1
 631   r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
 632
 633   // sub r0, r0, r20
 634   r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
 635
 636   // iadd r0, r0, r10
 637   r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
 638
 639   // ixor DST, r0, r10
 640   SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
 641   return DST;
 642 }
 643
 644 SDValue
 645 AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
 646   return SDValue(Op.getNode(), 0);
 647 }