lib/Target/NVPTX/NVPTXInstrInfo.td

   1 //===- NVPTXInstrInfo.td - NVPTX Instruction defs -------------*- tblgen-*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file describes the PTX instructions in TableGen format.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 include "NVPTXInstrFormats.td"
  15
  16 // A NOP instruction
  17 def NOP : NVPTXInst<(outs), (ins), "", []>;
  18
  19 // List of vector specific properties
  20 def isVecLD      : VecInstTypeEnum<1>;
  21 def isVecST      : VecInstTypeEnum<2>;
  22 def isVecBuild   : VecInstTypeEnum<3>;
  23 def isVecShuffle : VecInstTypeEnum<4>;
  24 def isVecExtract : VecInstTypeEnum<5>;
  25 def isVecInsert  : VecInstTypeEnum<6>;
  26 def isVecDest    : VecInstTypeEnum<7>;
  27 def isVecOther   : VecInstTypeEnum<15>;
  28
  29 //===----------------------------------------------------------------------===//
  30 // NVPTX Operand Definitions.
  31 //===----------------------------------------------------------------------===//
  32
  33 def brtarget    : Operand<OtherVT>;
  34
  35 //===----------------------------------------------------------------------===//
  36 // NVPTX Instruction Predicate Definitions
  37 //===----------------------------------------------------------------------===//
  38
  39
  40 def hasAtomRedG32 : Predicate<"Subtarget.hasAtomRedG32()">;
  41 def hasAtomRedS32 : Predicate<"Subtarget.hasAtomRedS32()">;
  42 def hasAtomRedGen32 : Predicate<"Subtarget.hasAtomRedGen32()">;
  43 def useAtomRedG32forGen32 :
  44   Predicate<"!Subtarget.hasAtomRedGen32() && Subtarget.hasAtomRedG32()">;
  45 def hasBrkPt : Predicate<"Subtarget.hasBrkPt()">;
  46 def hasAtomRedG64 : Predicate<"Subtarget.hasAtomRedG64()">;
  47 def hasAtomRedS64 : Predicate<"Subtarget.hasAtomRedS64()">;
  48 def hasAtomRedGen64 : Predicate<"Subtarget.hasAtomRedGen64()">;
  49 def useAtomRedG64forGen64 :
  50   Predicate<"!Subtarget.hasAtomRedGen64() && Subtarget.hasAtomRedG64()">;
  51 def hasAtomAddF32 : Predicate<"Subtarget.hasAtomAddF32()">;
  52 def hasVote : Predicate<"Subtarget.hasVote()">;
  53 def hasDouble : Predicate<"Subtarget.hasDouble()">;
  54 def reqPTX20 : Predicate<"Subtarget.reqPTX20()">;
  55 def hasLDG : Predicate<"Subtarget.hasLDG()">;
  56 def hasLDU : Predicate<"Subtarget.hasLDU()">;
  57 def hasGenericLdSt : Predicate<"Subtarget.hasGenericLdSt()">;
  58
  59 def doF32FTZ : Predicate<"UseF32FTZ">;
  60
  61 def doFMAF32      : Predicate<"doFMAF32">;
  62 def doFMAF32_ftz  : Predicate<"(doFMAF32 && UseF32FTZ)">;
  63 def doFMAF32AGG      : Predicate<"doFMAF32AGG">;
  64 def doFMAF32AGG_ftz  : Predicate<"(doFMAF32AGG && UseF32FTZ)">;
  65 def doFMAF64      : Predicate<"doFMAF64">;
  66 def doFMAF64AGG      : Predicate<"doFMAF64AGG">;
  67 def doFMADF32     : Predicate<"doFMADF32">;
  68 def doFMADF32_ftz : Predicate<"(doFMADF32 && UseF32FTZ)">;
  69
  70 def doMulWide      : Predicate<"doMulWide">;
  71
  72 def allowFMA : Predicate<"allowFMA">;
  73 def allowFMA_ftz : Predicate<"(allowFMA && UseF32FTZ)">;
  74
  75 def do_DIVF32_APPROX : Predicate<"do_DIVF32_PREC==0">;
  76 def do_DIVF32_FULL : Predicate<"do_DIVF32_PREC==1">;
  77
  78 def do_SQRTF32_APPROX : Predicate<"do_SQRTF32_PREC==0">;
  79 def do_SQRTF32_RN : Predicate<"do_SQRTF32_PREC==1">;
  80
  81 def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
  82
  83 def true : Predicate<"1">;
  84
  85 //===----------------------------------------------------------------------===//
  86 // Special Handling for 8-bit Operands and Operations
  87 //
  88 // PTX supports 8-bit signed and unsigned types, but does not support 8-bit
  89 // operations (like add, shift, etc) except for ld/st/cvt. SASS does not have
  90 // 8-bit registers.
  91 //
  92 // PTX ld, st and cvt instructions permit source and destination data operands
  93 // to be wider than the instruction-type size, so that narrow values may be
  94 // loaded, stored, and converted using regular-width registers.
  95 //
  96 // So in PTX generation, we
  97 // - always use 16-bit registers in place in 8-bit registers.
  98 //   (8-bit variables should stay as 8-bit as they represent memory layout.)
  99 // - for the following 8-bit operations, we sign-ext/zero-ext the 8-bit values
 100 //   before operation
 101 //   . div
 102 //   . rem
 103 //   . neg (sign)
 104 //   . set, setp
 105 //   . shr
 106 //
 107 // We are patching the operations by inserting the cvt instructions in the
 108 // asm strings of the affected instructions.
 109 //
 110 // Since vector operations, except for ld/st, are eventually elementized. We
 111 // do not need to special-hand the vector 8-bit operations.
 112 //
 113 //
 114 //===----------------------------------------------------------------------===//
 115
 116 // Generate string block like
 117 // {
 118 //   .reg .s16 %temp1;
 119 //   .reg .s16 %temp2;
 120 //   cvt.s16.s8 %temp1, %a;
 121 //   cvt.s16.s8 %temp2, %b;
 122 //   opc.s16    %dst, %temp1, %temp2;
 123 // }
 124 // when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
 125 class Handle_i8rr<string OpcStr, string TypeStr, string CVTStr> {
 126   string s = !strconcat("{{\n\t",
 127              !strconcat(".reg .", !strconcat(TypeStr,
 128              !strconcat(" \t%temp1;\n\t",
 129              !strconcat(".reg .", !strconcat(TypeStr,
 130              !strconcat(" \t%temp2;\n\t",
 131              !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
 132              !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
 133              !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}"))))))))))));
 134 }
 135
 136 // Generate string block like
 137 // {
 138 //   .reg .s16 %temp1;
 139 //   .reg .s16 %temp2;
 140 //   cvt.s16.s8 %temp1, %a;
 141 //   mov.b16    %temp2, %b;
 142 //   cvt.s16.s8 %temp2, %temp2;
 143 //   opc.s16    %dst, %temp1, %temp2;
 144 // }
 145 // when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
 146 class Handle_i8ri<string OpcStr, string TypeStr, string CVTStr> {
 147   string s = !strconcat("{{\n\t",
 148              !strconcat(".reg .", !strconcat(TypeStr,
 149              !strconcat(" \t%temp1;\n\t",
 150              !strconcat(".reg .",
 151              !strconcat(TypeStr, !strconcat(" \t%temp2;\n\t",
 152              !strconcat(CVTStr, !strconcat(" \t%temp1, $a;\n\t",
 153              !strconcat("mov.b16 \t%temp2, $b;\n\t",
 154              !strconcat(CVTStr, !strconcat(" \t%temp2, %temp2;\n\t",
 155              !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
 156 }
 157
 158 // Generate string block like
 159 // {
 160 //   .reg .s16 %temp1;
 161 //   .reg .s16 %temp2;
 162 //   mov.b16    %temp1, %b;
 163 //   cvt.s16.s8 %temp1, %temp1;
 164 //   cvt.s16.s8 %temp2, %a;
 165 //   opc.s16    %dst, %temp1, %temp2;
 166 // }
 167 // when OpcStr=opc.s TypeStr=s16 CVTStr=cvt.s16.s8
 168 class Handle_i8ir<string OpcStr, string TypeStr, string CVTStr> {
 169   string s = !strconcat("{{\n\t",
 170              !strconcat(".reg .", !strconcat(TypeStr,
 171              !strconcat(" \t%temp1;\n\t",
 172              !strconcat(".reg .", !strconcat(TypeStr,
 173              !strconcat(" \t%temp2;\n\t",
 174              !strconcat("mov.b16 \t%temp1, $a;\n\t",
 175              !strconcat(CVTStr, !strconcat(" \t%temp1, %temp1;\n\t",
 176              !strconcat(CVTStr, !strconcat(" \t%temp2, $b;\n\t",
 177              !strconcat(OpcStr, "16 \t$dst, %temp1, %temp2;\n\t}}")))))))))))));
 178 }
 179
 180
 181 //===----------------------------------------------------------------------===//
 182 // Some Common Instruction Class Templates
 183 //===----------------------------------------------------------------------===//
 184
 185 multiclass I3<string OpcStr, SDNode OpNode> {
 186   def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
 187                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
 188                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
 189                        Int64Regs:$b))]>;
 190   def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
 191                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
 192                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
 193   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
 194                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
 195                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
 196                        Int32Regs:$b))]>;
 197   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
 198                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
 199                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
 200   def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
 201                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
 202                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
 203                        Int16Regs:$b))]>;
 204   def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
 205                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
 206                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
 207   def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
 208                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
 209                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
 210   def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
 211                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
 212                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
 213 }
 214
 215 multiclass I3_i8<string OpcStr, SDNode OpNode, string TypeStr, string CVTStr> {
 216   def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
 217                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
 218                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
 219                        Int64Regs:$b))]>;
 220   def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
 221                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
 222                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
 223   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
 224                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
 225                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
 226                        Int32Regs:$b))]>;
 227   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
 228                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
 229                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
 230   def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
 231                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
 232                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
 233                        Int16Regs:$b))]>;
 234   def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
 235                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
 236                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
 237   def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
 238                      Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
 239                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
 240   def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
 241                      Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
 242                      [(set Int8Regs:$dst, (OpNode Int8Regs:$a, (imm):$b))]>;
 243 }
 244
 245 multiclass I3_noi8<string OpcStr, SDNode OpNode> {
 246   def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
 247                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
 248                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
 249                        Int64Regs:$b))]>;
 250   def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
 251                      !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
 252                      [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
 253   def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
 254                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
 255                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
 256                        Int32Regs:$b))]>;
 257   def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
 258                      !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
 259                      [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
 260   def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
 261                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
 262                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
 263                        Int16Regs:$b))]>;
 264   def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
 265                      !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
 266                      [(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
 267 }
 268
 269 multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
 270    def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
 271        Int32Regs:$b),
 272                       !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
 273                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
 274                         Int32Regs:$b))]>;
 275    def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
 276                       !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
 277                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
 278 }
 279
 280 multiclass F3<string OpcStr, SDNode OpNode> {
 281    def f64rr : NVPTXInst<(outs Float64Regs:$dst),
 282                       (ins Float64Regs:$a, Float64Regs:$b),
 283                       !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
 284                       [(set Float64Regs:$dst,
 285                         (OpNode Float64Regs:$a, Float64Regs:$b))]>,
 286                       Requires<[allowFMA]>;
 287    def f64ri : NVPTXInst<(outs Float64Regs:$dst),
 288                       (ins Float64Regs:$a, f64imm:$b),
 289                       !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
 290                       [(set Float64Regs:$dst,
 291                         (OpNode Float64Regs:$a, fpimm:$b))]>,
 292                       Requires<[allowFMA]>;
 293    def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
 294                       (ins Float32Regs:$a, Float32Regs:$b),
 295                       !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
 296                       [(set Float32Regs:$dst,
 297                         (OpNode Float32Regs:$a, Float32Regs:$b))]>,
 298                       Requires<[allowFMA_ftz]>;
 299    def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
 300                       (ins Float32Regs:$a, f32imm:$b),
 301                       !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
 302                       [(set Float32Regs:$dst,
 303                         (OpNode Float32Regs:$a, fpimm:$b))]>,
 304                       Requires<[allowFMA_ftz]>;
 305    def f32rr : NVPTXInst<(outs Float32Regs:$dst),
 306                       (ins Float32Regs:$a, Float32Regs:$b),
 307                       !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
 308                       [(set Float32Regs:$dst,
 309                         (OpNode Float32Regs:$a, Float32Regs:$b))]>,
 310                       Requires<[allowFMA]>;
 311    def f32ri : NVPTXInst<(outs Float32Regs:$dst),
 312                       (ins Float32Regs:$a, f32imm:$b),
 313                       !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
 314                       [(set Float32Regs:$dst,
 315                         (OpNode Float32Regs:$a, fpimm:$b))]>,
 316                       Requires<[allowFMA]>;
 317 }
 318
 319 multiclass F3_rn<string OpcStr, SDNode OpNode> {
 320    def f64rr : NVPTXInst<(outs Float64Regs:$dst),
 321                       (ins Float64Regs:$a, Float64Regs:$b),
 322                       !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
 323                       [(set Float64Regs:$dst,
 324                         (OpNode Float64Regs:$a, Float64Regs:$b))]>;
 325    def f64ri : NVPTXInst<(outs Float64Regs:$dst),
 326                       (ins Float64Regs:$a, f64imm:$b),
 327                       !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
 328                       [(set Float64Regs:$dst,
 329                         (OpNode Float64Regs:$a, fpimm:$b))]>;
 330    def f32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
 331                       (ins Float32Regs:$a, Float32Regs:$b),
 332                       !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
 333                       [(set Float32Regs:$dst,
 334                         (OpNode Float32Regs:$a, Float32Regs:$b))]>,
 335                       Requires<[doF32FTZ]>;
 336    def f32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
 337                       (ins Float32Regs:$a, f32imm:$b),
 338                       !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
 339                       [(set Float32Regs:$dst,
 340                         (OpNode Float32Regs:$a, fpimm:$b))]>,
 341                       Requires<[doF32FTZ]>;
 342    def f32rr : NVPTXInst<(outs Float32Regs:$dst),
 343                       (ins Float32Regs:$a, Float32Regs:$b),
 344                       !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
 345                       [(set Float32Regs:$dst,
 346                         (OpNode Float32Regs:$a, Float32Regs:$b))]>;
 347    def f32ri : NVPTXInst<(outs Float32Regs:$dst),
 348                       (ins Float32Regs:$a, f32imm:$b),
 349                       !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
 350                       [(set Float32Regs:$dst,
 351                         (OpNode Float32Regs:$a, fpimm:$b))]>;
 352 }
 353
 354 multiclass F2<string OpcStr, SDNode OpNode> {
 355    def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
 356                       !strconcat(OpcStr, ".f64 \t$dst, $a;"),
 357                       [(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>;
 358    def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
 359                       !strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"),
 360                       [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>,
 361                       Requires<[doF32FTZ]>;
 362    def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
 363                       !strconcat(OpcStr, ".f32 \t$dst, $a;"),
 364                       [(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
 365 }
 366
 367 //===----------------------------------------------------------------------===//
 368 // NVPTX Instructions.
 369 //===----------------------------------------------------------------------===//
 370
 371 //-----------------------------------
 372 // Integer Arithmetic
 373 //-----------------------------------
 374
 375 multiclass ADD_SUB_i1<SDNode OpNode> {
 376    def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
 377           "xor.pred \t$dst, $a, $b;",
 378       [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
 379    def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
 380           "xor.pred \t$dst, $a, $b;",
 381       [(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>;
 382 }
 383
 384 defm ADD_i1 : ADD_SUB_i1<add>;
 385 defm SUB_i1 : ADD_SUB_i1<sub>;
 386
 387
 388 defm ADD : I3<"add.s", add>;
 389 defm SUB : I3<"sub.s", sub>;
 390
 391 defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>;
 392 defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
 393
 394 defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>;
 395 defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>;
 396
 397 //mul.wide PTX instruction
 398 def SInt32Const : PatLeaf<(imm), [{
 399   const APInt &v = N->getAPIntValue();
 400   if (v.isSignedIntN(32))
 401     return true;
 402   return false;
 403 }]>;
 404
 405 def UInt32Const : PatLeaf<(imm), [{
 406   const APInt &v = N->getAPIntValue();
 407   if (v.isIntN(32))
 408     return true;
 409   return false;
 410 }]>;
 411
 412 def SInt16Const : PatLeaf<(imm), [{
 413   const APInt &v = N->getAPIntValue();
 414   if (v.isSignedIntN(16))
 415     return true;
 416   return false;
 417 }]>;
 418
 419 def UInt16Const : PatLeaf<(imm), [{
 420   const APInt &v = N->getAPIntValue();
 421   if (v.isIntN(16))
 422     return true;
 423   return false;
 424 }]>;
 425
 426 def Int5Const : PatLeaf<(imm), [{
 427   const APInt &v = N->getAPIntValue();
 428   // Check if 0 <= v < 32
 429   // Only then the result from (x << v) will be i32
 430   if (v.sge(0) && v.slt(32))
 431     return true;
 432   return false;
 433 }]>;
 434
 435 def Int4Const : PatLeaf<(imm), [{
 436   const APInt &v = N->getAPIntValue();
 437   // Check if 0 <= v < 16
 438   // Only then the result from (x << v) will be i16
 439   if (v.sge(0) && v.slt(16))
 440     return true;
 441   return false;
 442 }]>;
 443
 444 def SHL2MUL32 : SDNodeXForm<imm, [{
 445   const APInt &v = N->getAPIntValue();
 446   APInt temp(32, 1);
 447   return CurDAG->getTargetConstant(temp.shl(v), MVT::i32);
 448 }]>;
 449
 450 def SHL2MUL16 : SDNodeXForm<imm, [{
 451   const APInt &v = N->getAPIntValue();
 452   APInt temp(16, 1);
 453   return CurDAG->getTargetConstant(temp.shl(v), MVT::i16);
 454 }]>;
 455
 456 def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst),
 457                            (ins Int32Regs:$a, Int32Regs:$b),
 458                            "mul.wide.s32 \t$dst, $a, $b;", []>;
 459 def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst),
 460                             (ins Int32Regs:$a, i64imm:$b),
 461                            "mul.wide.s32 \t$dst, $a, $b;", []>;
 462
 463 def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst),
 464                            (ins Int32Regs:$a, Int32Regs:$b),
 465                            "mul.wide.u32 \t$dst, $a, $b;", []>;
 466 def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst),
 467                             (ins Int32Regs:$a, i64imm:$b),
 468                            "mul.wide.u32 \t$dst, $a, $b;", []>;
 469
 470 def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst),
 471                             (ins Int16Regs:$a, Int16Regs:$b),
 472                            "mul.wide.s16 \t$dst, $a, $b;", []>;
 473 def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst),
 474                             (ins Int16Regs:$a, i32imm:$b),
 475                            "mul.wide.s16 \t$dst, $a, $b;", []>;
 476
 477 def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst),
 478                             (ins Int16Regs:$a, Int16Regs:$b),
 479                            "mul.wide.u16 \t$dst, $a, $b;", []>;
 480 def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst),
 481                             (ins Int16Regs:$a, i32imm:$b),
 482                            "mul.wide.u16 \t$dst, $a, $b;", []>;
 483
 484 def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
 485           (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
 486           Requires<[doMulWide]>;
 487 def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)),
 488           (MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
 489           Requires<[doMulWide]>;
 490
 491 def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)),
 492           (MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
 493           Requires<[doMulWide]>;
 494 def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)),
 495           (MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
 496           Requires<[doMulWide]>;
 497
 498 def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
 499           (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
 500           Requires<[doMulWide]>;
 501 def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
 502           (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>,
 503           Requires<[doMulWide]>;
 504
 505 def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
 506           (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>;
 507 def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
 508           (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>,
 509           Requires<[doMulWide]>;
 510
 511 def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
 512           (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
 513 def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
 514           (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>,
 515           Requires<[doMulWide]>;
 516
 517 def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
 518           (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
 519 def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
 520           (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>,
 521           Requires<[doMulWide]>;
 522
 523 defm MULT : I3<"mul.lo.s", mul>;
 524
 525 defm MULTHS : I3_noi8<"mul.hi.s", mulhs>;
 526 defm MULTHU : I3_noi8<"mul.hi.u", mulhu>;
 527 def MULTHSi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
 528             !strconcat("{{ \n\t",
 529             !strconcat(".reg \t.s16 temp1; \n\t",
 530             !strconcat(".reg \t.s16 temp2; \n\t",
 531             !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
 532             !strconcat("cvt.s16.s8 \ttemp2, $b; \n\t",
 533             !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
 534             !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
 535             !strconcat("}}", "")))))))),
 536       [(set Int8Regs:$dst, (mulhs Int8Regs:$a, Int8Regs:$b))]>;
 537 def MULTHSi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
 538             !strconcat("{{ \n\t",
 539             !strconcat(".reg \t.s16 temp1; \n\t",
 540             !strconcat(".reg \t.s16 temp2; \n\t",
 541             !strconcat("cvt.s16.s8 \ttemp1, $a; \n\t",
 542             !strconcat("mov.b16 \ttemp2, $b; \n\t",
 543             !strconcat("cvt.s16.s8 \ttemp2, temp2; \n\t",
 544             !strconcat("mul.lo.s16 \t$dst, temp1, temp2; \n\t",
 545             !strconcat("shr.s16 \t$dst, $dst, 8; \n\t",
 546             !strconcat("}}", ""))))))))),
 547       [(set Int8Regs:$dst, (mulhs Int8Regs:$a, imm:$b))]>;
 548 def MULTHUi8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
 549             !strconcat("{{ \n\t",
 550             !strconcat(".reg \t.u16 temp1; \n\t",
 551             !strconcat(".reg \t.u16 temp2; \n\t",
 552             !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
 553             !strconcat("cvt.u16.u8 \ttemp2, $b; \n\t",
 554             !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
 555             !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
 556             !strconcat("}}", "")))))))),
 557       [(set Int8Regs:$dst, (mulhu Int8Regs:$a, Int8Regs:$b))]>;
 558 def MULTHUi8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
 559             !strconcat("{{ \n\t",
 560             !strconcat(".reg \t.u16 temp1; \n\t",
 561             !strconcat(".reg \t.u16 temp2; \n\t",
 562             !strconcat("cvt.u16.u8 \ttemp1, $a; \n\t",
 563             !strconcat("mov.b16 \ttemp2, $b; \n\t",
 564             !strconcat("cvt.u16.u8 \ttemp2, temp2; \n\t",
 565             !strconcat("mul.lo.u16 \t$dst, temp1, temp2; \n\t",
 566             !strconcat("shr.u16 \t$dst, $dst, 8; \n\t",
 567             !strconcat("}}", ""))))))))),
 568       [(set Int8Regs:$dst, (mulhu Int8Regs:$a, imm:$b))]>;
 569
 570
 571 defm SDIV : I3_i8<"div.s", sdiv, "s16", "cvt.s16.s8">;
 572 defm UDIV : I3_i8<"div.u", udiv, "u16", "cvt.u16.u8">;
 573
 574 defm SREM : I3_i8<"rem.s", srem, "s16", "cvt.s16.s8">;
 575 // The ri version will not be selected as DAGCombiner::visitSREM will lower it.
 576 defm UREM : I3_i8<"rem.u", urem, "u16", "cvt.u16.u8">;
 577 // The ri version will not be selected as DAGCombiner::visitUREM will lower it.
 578
 579 def MAD8rrr : NVPTXInst<(outs Int8Regs:$dst),
 580                       (ins Int8Regs:$a, Int8Regs:$b, Int8Regs:$c),
 581                       "mad.lo.s16 \t$dst, $a, $b, $c;",
 582                       [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
 583                         Int8Regs:$c))]>;
 584 def MAD8rri : NVPTXInst<(outs Int8Regs:$dst),
 585                       (ins Int8Regs:$a, Int8Regs:$b, i8imm:$c),
 586                       "mad.lo.s16 \t$dst, $a, $b, $c;",
 587                       [(set Int8Regs:$dst, (add (mul Int8Regs:$a, Int8Regs:$b),
 588                         imm:$c))]>;
 589 def MAD8rir : NVPTXInst<(outs Int8Regs:$dst),
 590                       (ins Int8Regs:$a, i8imm:$b, Int8Regs:$c),
 591                       "mad.lo.s16 \t$dst, $a, $b, $c;",
 592                       [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
 593                         Int8Regs:$c))]>;
 594 def MAD8rii : NVPTXInst<(outs Int8Regs:$dst),
 595                       (ins Int8Regs:$a, i8imm:$b, i8imm:$c),
 596                       "mad.lo.s16 \t$dst, $a, $b, $c;",
 597                       [(set Int8Regs:$dst, (add (mul Int8Regs:$a, imm:$b),
 598                         imm:$c))]>;
 599
 600 def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
 601                       (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
 602                       "mad.lo.s16 \t$dst, $a, $b, $c;",
 603                       [(set Int16Regs:$dst, (add
 604                         (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>;
 605 def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),
 606                       (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
 607                       "mad.lo.s16 \t$dst, $a, $b, $c;",
 608                       [(set Int16Regs:$dst, (add
 609                         (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>;
 610 def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),
 611                       (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
 612                       "mad.lo.s16 \t$dst, $a, $b, $c;",
 613                       [(set Int16Regs:$dst, (add
 614                         (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>;
 615 def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),
 616     (ins Int16Regs:$a, i16imm:$b, i16imm:$c),
 617                       "mad.lo.s16 \t$dst, $a, $b, $c;",
 618                       [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b),
 619                         imm:$c))]>;
 620
 621 def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),
 622                       (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
 623                       "mad.lo.s32 \t$dst, $a, $b, $c;",
 624                       [(set Int32Regs:$dst, (add
 625                         (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>;
 626 def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),
 627                       (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
 628                       "mad.lo.s32 \t$dst, $a, $b, $c;",
 629                       [(set Int32Regs:$dst, (add
 630                         (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>;
 631 def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),
 632                       (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
 633                       "mad.lo.s32 \t$dst, $a, $b, $c;",
 634                       [(set Int32Regs:$dst, (add
 635                         (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>;
 636 def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),
 637                       (ins Int32Regs:$a, i32imm:$b, i32imm:$c),
 638                       "mad.lo.s32 \t$dst, $a, $b, $c;",
 639                       [(set Int32Regs:$dst, (add
 640                         (mul Int32Regs:$a, imm:$b), imm:$c))]>;
 641
 642 def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),
 643                       (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
 644                       "mad.lo.s64 \t$dst, $a, $b, $c;",
 645                       [(set Int64Regs:$dst, (add
 646                         (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>;
 647 def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),
 648                       (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
 649                       "mad.lo.s64 \t$dst, $a, $b, $c;",
 650                       [(set Int64Regs:$dst, (add
 651                         (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>;
 652 def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),
 653                       (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
 654                       "mad.lo.s64 \t$dst, $a, $b, $c;",
 655                       [(set Int64Regs:$dst, (add
 656                         (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>;
 657 def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
 658                       (ins Int64Regs:$a, i64imm:$b, i64imm:$c),
 659                       "mad.lo.s64 \t$dst, $a, $b, $c;",
 660                       [(set Int64Regs:$dst, (add
 661                         (mul Int64Regs:$a, imm:$b), imm:$c))]>;
 662
 663
 664 def INEG8 : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
 665                      !strconcat("cvt.s16.s8 \t$dst, $src;\n\t",
 666                                  "neg.s16 \t$dst, $dst;"),
 667          [(set Int8Regs:$dst, (ineg Int8Regs:$src))]>;
 668 def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
 669                      "neg.s16 \t$dst, $src;",
 670          [(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
 671 def INEG32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
 672                      "neg.s32 \t$dst, $src;",
 673          [(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
 674 def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
 675                      "neg.s64 \t$dst, $src;",
 676          [(set Int64Regs:$dst, (ineg Int64Regs:$src))]>;
 677
 678 //-----------------------------------
 679 // Floating Point Arithmetic
 680 //-----------------------------------
 681
 682 // Constant 1.0f
 683 def FloatConst1 : PatLeaf<(fpimm), [{
 684     if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEsingle)
 685       return false;
 686     float f = (float)N->getValueAPF().convertToFloat();
 687     return (f==1.0f);
 688 }]>;
 689 // Constand (double)1.0
 690 def DoubleConst1 : PatLeaf<(fpimm), [{
 691     if (&(N->getValueAPF().getSemantics()) != &llvm::APFloat::IEEEdouble)
 692       return false;
 693     double d = (double)N->getValueAPF().convertToDouble();
 694     return (d==1.0);
 695 }]>;
 696
 697 defm FADD : F3<"add", fadd>;
 698 defm FSUB : F3<"sub", fsub>;
 699 defm FMUL : F3<"mul", fmul>;
 700
 701 defm FADD_rn : F3_rn<"add", fadd>;
 702 defm FSUB_rn : F3_rn<"sub", fsub>;
 703 defm FMUL_rn : F3_rn<"mul", fmul>;
 704
 705 defm FABS : F2<"abs", fabs>;
 706 defm FNEG : F2<"neg", fneg>;
 707 defm FSQRT : F2<"sqrt.rn", fsqrt>;
 708
 709 //
 710 // F64 division
 711 //
 712 def FDIV641r : NVPTXInst<(outs Float64Regs:$dst),
 713                       (ins f64imm:$a, Float64Regs:$b),
 714                       "rcp.rn.f64 \t$dst, $b;",
 715                       [(set Float64Regs:$dst,
 716                         (fdiv DoubleConst1:$a, Float64Regs:$b))]>;
 717 def FDIV64rr : NVPTXInst<(outs Float64Regs:$dst),
 718                       (ins Float64Regs:$a, Float64Regs:$b),
 719                       "div.rn.f64 \t$dst, $a, $b;",
 720                       [(set Float64Regs:$dst,
 721                         (fdiv Float64Regs:$a, Float64Regs:$b))]>;
 722 def FDIV64ri : NVPTXInst<(outs Float64Regs:$dst),
 723                       (ins Float64Regs:$a, f64imm:$b),
 724                       "div.rn.f64 \t$dst, $a, $b;",
 725                       [(set Float64Regs:$dst,
 726                         (fdiv Float64Regs:$a, fpimm:$b))]>;
 727
 728 //
 729 // F32 Approximate reciprocal
 730 //
 731 def FDIV321r_ftz : NVPTXInst<(outs Float32Regs:$dst),
 732                       (ins f32imm:$a, Float32Regs:$b),
 733                       "rcp.approx.ftz.f32 \t$dst, $b;",
 734                       [(set Float32Regs:$dst,
 735                         (fdiv FloatConst1:$a, Float32Regs:$b))]>,
 736                       Requires<[do_DIVF32_APPROX, doF32FTZ]>;
 737 def FDIV321r : NVPTXInst<(outs Float32Regs:$dst),
 738                         (ins f32imm:$a, Float32Regs:$b),
 739                        "rcp.approx.f32 \t$dst, $b;",
 740                       [(set Float32Regs:$dst,
 741                         (fdiv FloatConst1:$a, Float32Regs:$b))]>,
 742                       Requires<[do_DIVF32_APPROX]>;
 743 //
 744 // F32 Approximate division
 745 //
 746 def FDIV32approxrr_ftz : NVPTXInst<(outs Float32Regs:$dst),
 747                       (ins Float32Regs:$a, Float32Regs:$b),
 748                       "div.approx.ftz.f32 \t$dst, $a, $b;",
 749                       [(set Float32Regs:$dst,
 750                         (fdiv Float32Regs:$a, Float32Regs:$b))]>,
 751                       Requires<[do_DIVF32_APPROX, doF32FTZ]>;
 752 def FDIV32approxrr     : NVPTXInst<(outs Float32Regs:$dst),
 753                       (ins Float32Regs:$a, Float32Regs:$b),
 754                       "div.approx.f32 \t$dst, $a, $b;",
 755                       [(set Float32Regs:$dst,
 756                         (fdiv Float32Regs:$a, Float32Regs:$b))]>,
 757                       Requires<[do_DIVF32_APPROX]>;
 758 //
 759 // F32 Semi-accurate reciprocal
 760 //
 761 // rcp.approx gives the same result as div.full(1.0f, a) and is faster.
 762 //
 763 def FDIV321r_approx_ftz : NVPTXInst<(outs Float32Regs:$dst),
 764                       (ins f32imm:$a, Float32Regs:$b),
 765                       "rcp.approx.ftz.f32 \t$dst, $b;",
 766                       [(set Float32Regs:$dst,
 767                         (fdiv FloatConst1:$a, Float32Regs:$b))]>,
 768                       Requires<[do_DIVF32_FULL, doF32FTZ]>;
 769 def FDIV321r_approx : NVPTXInst<(outs Float32Regs:$dst),
 770                       (ins f32imm:$a, Float32Regs:$b),
 771                       "rcp.approx.f32 \t$dst, $b;",
 772                       [(set Float32Regs:$dst,
 773                         (fdiv FloatConst1:$a, Float32Regs:$b))]>,
 774                       Requires<[do_DIVF32_FULL]>;
 775 //
 776 // F32 Semi-accurate division
 777 //
 778 def FDIV32rr_ftz : NVPTXInst<(outs Float32Regs:$dst),
 779                       (ins Float32Regs:$a, Float32Regs:$b),
 780                       "div.full.ftz.f32 \t$dst, $a, $b;",
 781                       [(set Float32Regs:$dst,
 782                         (fdiv Float32Regs:$a, Float32Regs:$b))]>,
 783                       Requires<[do_DIVF32_FULL, doF32FTZ]>;
 784 def FDIV32ri_ftz : NVPTXInst<(outs Float32Regs:$dst),
 785                       (ins Float32Regs:$a, f32imm:$b),
 786                       "div.full.ftz.f32 \t$dst, $a, $b;",
 787                       [(set Float32Regs:$dst,
 788                         (fdiv Float32Regs:$a, fpimm:$b))]>,
 789                       Requires<[do_DIVF32_FULL, doF32FTZ]>;
 790 def FDIV32rr : NVPTXInst<(outs Float32Regs:$dst),
 791                       (ins Float32Regs:$a, Float32Regs:$b),
 792                       "div.full.f32 \t$dst, $a, $b;",
 793                       [(set Float32Regs:$dst,
 794                         (fdiv Float32Regs:$a, Float32Regs:$b))]>,
 795                       Requires<[do_DIVF32_FULL]>;
 796 def FDIV32ri : NVPTXInst<(outs Float32Regs:$dst),
 797                       (ins Float32Regs:$a, f32imm:$b),
 798                       "div.full.f32 \t$dst, $a, $b;",
 799                       [(set Float32Regs:$dst,
 800                         (fdiv Float32Regs:$a, fpimm:$b))]>,
 801                       Requires<[do_DIVF32_FULL]>;
 802 //
 803 // F32 Accurate reciprocal
 804 //
 805 def FDIV321r_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
 806                         (ins f32imm:$a, Float32Regs:$b),
 807                        "rcp.rn.ftz.f32 \t$dst, $b;",
 808                       [(set Float32Regs:$dst,
 809                         (fdiv FloatConst1:$a, Float32Regs:$b))]>,
 810                       Requires<[reqPTX20, doF32FTZ]>;
 811 def FDIV321r_prec : NVPTXInst<(outs Float32Regs:$dst),
 812                       (ins f32imm:$a, Float32Regs:$b),
 813                        "rcp.rn.f32 \t$dst, $b;",
 814                       [(set Float32Regs:$dst,
 815                         (fdiv FloatConst1:$a, Float32Regs:$b))]>,
 816                       Requires<[reqPTX20]>;
 817 //
 818 // F32 Accurate division
 819 //
 820 def FDIV32rr_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
 821                       (ins Float32Regs:$a, Float32Regs:$b),
 822                       "div.rn.ftz.f32 \t$dst, $a, $b;",
 823                       [(set Float32Regs:$dst,
 824                         (fdiv Float32Regs:$a, Float32Regs:$b))]>,
 825                       Requires<[doF32FTZ, reqPTX20]>;
 826 def FDIV32ri_prec_ftz : NVPTXInst<(outs Float32Regs:$dst),
 827                       (ins Float32Regs:$a, f32imm:$b),
 828                       "div.rn.ftz.f32 \t$dst, $a, $b;",
 829                       [(set Float32Regs:$dst,
 830                         (fdiv Float32Regs:$a, fpimm:$b))]>,
 831                       Requires<[doF32FTZ, reqPTX20]>;
 832 def FDIV32rr_prec : NVPTXInst<(outs Float32Regs:$dst),
 833                       (ins Float32Regs:$a, Float32Regs:$b),
 834                       "div.rn.f32 \t$dst, $a, $b;",
 835                       [(set Float32Regs:$dst,
 836                         (fdiv Float32Regs:$a, Float32Regs:$b))]>,
 837                       Requires<[reqPTX20]>;
 838 def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst),
 839                       (ins Float32Regs:$a, f32imm:$b),
 840                       "div.rn.f32 \t$dst, $a, $b;",
 841                       [(set Float32Regs:$dst,
 842                         (fdiv Float32Regs:$a, fpimm:$b))]>,
 843                       Requires<[reqPTX20]>;
 844
 845
 846 multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
 847    def rrr : NVPTXInst<(outs Float32Regs:$dst),
 848                       (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
 849                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
 850                       [(set Float32Regs:$dst, (fadd
 851                         (fmul Float32Regs:$a, Float32Regs:$b),
 852                         Float32Regs:$c))]>, Requires<[Pred]>;
 853    // This is to WAR a weird bug in Tablegen that does not automatically
 854    // generate the following permutated rule rrr2 from the above rrr.
 855    // So we explicitly add it here. This happens to FMA32 only.
 856    // See the comments at FMAD32 and FMA32 for more information.
 857    def rrr2 : NVPTXInst<(outs Float32Regs:$dst),
 858                         (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
 859                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
 860                       [(set Float32Regs:$dst, (fadd Float32Regs:$c,
 861                         (fmul Float32Regs:$a, Float32Regs:$b)))]>,
 862                       Requires<[Pred]>;
 863    def rri : NVPTXInst<(outs Float32Regs:$dst),
 864                       (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c),
 865                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
 866                       [(set Float32Regs:$dst, (fadd
 867                         (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>,
 868                       Requires<[Pred]>;
 869    def rir : NVPTXInst<(outs Float32Regs:$dst),
 870                       (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c),
 871                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
 872                       [(set Float32Regs:$dst, (fadd
 873                         (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>,
 874                       Requires<[Pred]>;
 875    def rii : NVPTXInst<(outs Float32Regs:$dst),
 876                       (ins Float32Regs:$a, f32imm:$b, f32imm:$c),
 877                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
 878                       [(set Float32Regs:$dst, (fadd
 879                         (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>,
 880                       Requires<[Pred]>;
 881 }
 882
 883 multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
 884    def rrr : NVPTXInst<(outs Float64Regs:$dst),
 885                       (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),
 886                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
 887                       [(set Float64Regs:$dst, (fadd
 888                         (fmul Float64Regs:$a, Float64Regs:$b),
 889                         Float64Regs:$c))]>, Requires<[Pred]>;
 890    def rri : NVPTXInst<(outs Float64Regs:$dst),
 891                       (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),
 892                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
 893                       [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a,
 894                         Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>;
 895    def rir : NVPTXInst<(outs Float64Regs:$dst),
 896                       (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),
 897                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
 898                       [(set Float64Regs:$dst, (fadd
 899                         (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>,
 900                       Requires<[Pred]>;
 901    def rii : NVPTXInst<(outs Float64Regs:$dst),
 902                       (ins Float64Regs:$a, f64imm:$b, f64imm:$c),
 903                       !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
 904                       [(set Float64Regs:$dst, (fadd
 905                         (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>,
 906                       Requires<[Pred]>;
 907 }
 908
 909 // Due to a unknown reason (most likely a bug in tablegen), tablegen does not
 910 // automatically generate the rrr2 rule from
 911 // the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32.
 912 // If we reverse the order of the following two lines, then rrr2 rule will be
 913 // generated for FMA32, but not for rrr.
 914 // Therefore, we manually write the rrr2 rule in FPCONTRACT32.
 915 defm FMAD32_ftz : FPCONTRACT32<"mad.ftz.f32", doFMADF32_ftz>;
 916 defm FMAD32 : FPCONTRACT32<"mad.f32", doFMADF32>;
 917 defm FMA32_ftz  : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>;
 918 defm FMA32  : FPCONTRACT32<"fma.rn.f32", doFMAF32>;
 919 defm FMA64  : FPCONTRACT64<"fma.rn.f64", doFMAF64>;
 920
 921 // b*c-a => fmad(b, c, -a)
 922 multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> {
 923   def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
 924           (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
 925           Requires<[Pred]>;
 926 }
 927
 928 // a-b*c => fmad(-b,c, a)
 929 // - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c
 930 // b*c-a => fmad(b, c, -a)
 931 // - legal because b*c-a <=> b*c+(-a)
 932 multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
 933   def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)),
 934           (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>,
 935           Requires<[Pred]>;
 936   def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
 937           (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
 938           Requires<[Pred]>;
 939 }
 940
 941 // a-b*c => fmad(-b,c, a)
 942 // b*c-a => fmad(b, c, -a)
 943 multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
 944   def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)),
 945           (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>,
 946           Requires<[Pred]>;
 947
 948   def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a),
 949           (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>,
 950           Requires<[Pred]>;
 951 }
 952
 953 defm FMAF32ext_ftz  : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>;
 954 defm FMAF32ext  : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>;
 955 defm FMADF32ext_ftz : FPCONTRACT32_SUB_PAT_MAD<FMAD32_ftzrrr, doFMADF32_ftz>;
 956 defm FMADF32ext : FPCONTRACT32_SUB_PAT_MAD<FMAD32rrr, doFMADF32>;
 957 defm FMAF64ext  : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>;
 958
 959 def SINF:  NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
 960                       "sin.approx.f32 \t$dst, $src;",
 961                       [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
 962 def COSF:  NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
 963                       "cos.approx.f32 \t$dst, $src;",
 964                       [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>;
 965
 966 //-----------------------------------
 967 // Logical Arithmetic
 968 //-----------------------------------
 969
 970 multiclass LOG_FORMAT<string OpcStr, SDNode OpNode> {
 971   def b1rr:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
 972                       !strconcat(OpcStr, ".pred  \t$dst, $a, $b;"),
 973                       [(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
 974   def b1ri:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
 975                       !strconcat(OpcStr, ".pred  \t$dst, $a, $b;"),
 976                       [(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
 977   def b8rr:  NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
 978                       !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
 979                       [(set Int8Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
 980   def b8ri:  NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
 981                       !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
 982                       [(set Int8Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
 983   def b16rr:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
 984                       !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
 985                       [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
 986                         Int16Regs:$b))]>;
 987   def b16ri:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
 988                       !strconcat(OpcStr, ".b16  \t$dst, $a, $b;"),
 989                       [(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
 990   def b32rr:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
 991                       !strconcat(OpcStr, ".b32  \t$dst, $a, $b;"),
 992                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
 993                         Int32Regs:$b))]>;
 994   def b32ri:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
 995                       !strconcat(OpcStr, ".b32  \t$dst, $a, $b;"),
 996                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
 997   def b64rr:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
 998                       !strconcat(OpcStr, ".b64  \t$dst, $a, $b;"),
 999                       [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1000                         Int64Regs:$b))]>;
1001   def b64ri:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
1002                       !strconcat(OpcStr, ".b64  \t$dst, $a, $b;"),
1003                       [(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1004 }
1005
1006 defm OR  : LOG_FORMAT<"or", or>;
1007 defm AND : LOG_FORMAT<"and", and>;
1008 defm XOR : LOG_FORMAT<"xor", xor>;
1009
1010 def NOT1:  NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
1011                       "not.pred \t$dst, $src;",
1012                       [(set Int1Regs:$dst, (not Int1Regs:$src))]>;
1013 def NOT8:  NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
1014                       "not.b16 \t$dst, $src;",
1015                       [(set Int8Regs:$dst, (not Int8Regs:$src))]>;
1016 def NOT16:  NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
1017                       "not.b16 \t$dst, $src;",
1018                       [(set Int16Regs:$dst, (not Int16Regs:$src))]>;
1019 def NOT32:  NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
1020                       "not.b32 \t$dst, $src;",
1021                       [(set Int32Regs:$dst, (not Int32Regs:$src))]>;
1022 def NOT64:  NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
1023                       "not.b64 \t$dst, $src;",
1024                       [(set Int64Regs:$dst, (not Int64Regs:$src))]>;
1025
1026 // For shifts, the second src operand must be 32-bit value
1027 multiclass LSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
1028    def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1029                       Int32Regs:$b),
1030                       !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1031                       [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1032                         Int32Regs:$b))]>;
1033    def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1034                       !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1035                       [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1036                         (i32 imm:$b)))]>;
1037    def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1038                       Int32Regs:$b),
1039                       !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1040                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1041                         Int32Regs:$b))]>;
1042    def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1043                       !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1044                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1045                         (i32 imm:$b)))]>;
1046    def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1047                       !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1048                       [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1049                         (i32 imm:$b)))]>;
1050    def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1051                       Int32Regs:$b),
1052                       !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1053                       [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1054                         Int32Regs:$b))]>;
1055    def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1056                       !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1057                       [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1058                         (i32 imm:$b)))]>;
1059    def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
1060                       !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1061                       [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1062                         Int32Regs:$b))]>;
1063    def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
1064                       !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1065                       [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1066                         (i32 imm:$b)))]>;
1067 }
1068
1069 defm SHL : LSHIFT_FORMAT<"shl.b", shl>;
1070
1071 // For shifts, the second src operand must be 32-bit value
1072 // Need to add cvt for the 8-bits.
1073 multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode, string CVTStr> {
1074    def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1075                       Int32Regs:$b),
1076                       !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1077                       [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1078                         Int32Regs:$b))]>;
1079    def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
1080                       !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1081                       [(set Int64Regs:$dst, (OpNode Int64Regs:$a,
1082                         (i32 imm:$b)))]>;
1083    def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1084                       Int32Regs:$b),
1085                       !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1086                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1087                         Int32Regs:$b))]>;
1088    def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1089                       !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1090                       [(set Int32Regs:$dst, (OpNode Int32Regs:$a,
1091                         (i32 imm:$b)))]>;
1092    def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
1093                       !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1094                       [(set Int32Regs:$dst, (OpNode (i32 imm:$a),
1095                         (i32 imm:$b)))]>;
1096    def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1097                       Int32Regs:$b),
1098                       !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1099                       [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1100                         Int32Regs:$b))]>;
1101    def i16ri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
1102                       !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1103                       [(set Int16Regs:$dst, (OpNode Int16Regs:$a,
1104                         (i32 imm:$b)))]>;
1105    def i8rr : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int32Regs:$b),
1106                       !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
1107                       !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
1108                       [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1109                         Int32Regs:$b))]>;
1110    def i8ri : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, i32imm:$b),
1111                       !strconcat(CVTStr, !strconcat(" \t$dst, $a;\n\t",
1112                       !strconcat(OpcStr, "16 \t$dst, $dst, $b;"))),
1113                       [(set Int8Regs:$dst, (OpNode Int8Regs:$a,
1114                         (i32 imm:$b)))]>;
1115 }
1116
1117 defm SRA : RSHIFT_FORMAT<"shr.s", sra, "cvt.s16.s8">;
1118 defm SRL : RSHIFT_FORMAT<"shr.u", srl, "cvt.u16.u8">;
1119
1120 // 32bit
1121 def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
1122   (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
1123     !strconcat("{{\n\t",
1124     !strconcat(".reg .b32 %lhs;\n\t",
1125     !strconcat(".reg .b32 %rhs;\n\t",
1126     !strconcat("shl.b32 \t%lhs, $src, $amt1;\n\t",
1127     !strconcat("shr.b32 \t%rhs, $src, $amt2;\n\t",
1128     !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1129     !strconcat("}}", ""))))))),
1130     []>;
1131
1132 def SUB_FRM_32 : SDNodeXForm<imm, [{
1133     return CurDAG->getTargetConstant(32-N->getZExtValue(), MVT::i32);
1134 }]>;
1135
1136 def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
1137           (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>;
1138 def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
1139           (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>;
1140
1141 def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1142     Int32Regs:$amt),
1143     !strconcat("{{\n\t",
1144     !strconcat(".reg .b32 %lhs;\n\t",
1145     !strconcat(".reg .b32 %rhs;\n\t",
1146     !strconcat(".reg .b32 %amt2;\n\t",
1147     !strconcat("shl.b32 \t%lhs, $src, $amt;\n\t",
1148     !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1149     !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t",
1150     !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1151     !strconcat("}}", ""))))))))),
1152     [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>;
1153
1154 def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
1155     Int32Regs:$amt),
1156     !strconcat("{{\n\t",
1157     !strconcat(".reg .b32 %lhs;\n\t",
1158     !strconcat(".reg .b32 %rhs;\n\t",
1159     !strconcat(".reg .b32 %amt2;\n\t",
1160     !strconcat("shr.b32 \t%lhs, $src, $amt;\n\t",
1161     !strconcat("sub.s32 \t%amt2, 32, $amt;\n\t",
1162     !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t",
1163     !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
1164     !strconcat("}}", ""))))))))),
1165     [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>;
1166
1167 // 64bit
1168 def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1169     i32imm:$amt1, i32imm:$amt2),
1170     !strconcat("{{\n\t",
1171     !strconcat(".reg .b64 %lhs;\n\t",
1172     !strconcat(".reg .b64 %rhs;\n\t",
1173     !strconcat("shl.b64 \t%lhs, $src, $amt1;\n\t",
1174     !strconcat("shr.b64 \t%rhs, $src, $amt2;\n\t",
1175     !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1176     !strconcat("}}", ""))))))),
1177     []>;
1178
1179 def SUB_FRM_64 : SDNodeXForm<imm, [{
1180     return CurDAG->getTargetConstant(64-N->getZExtValue(), MVT::i32);
1181 }]>;
1182
1183 def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)),
1184           (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>;
1185 def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)),
1186           (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>;
1187
1188 def ROTL64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1189     Int32Regs:$amt),
1190     !strconcat("{{\n\t",
1191     !strconcat(".reg .b64 %lhs;\n\t",
1192     !strconcat(".reg .b64 %rhs;\n\t",
1193     !strconcat(".reg .u32 %amt2;\n\t",
1194     !strconcat("shl.b64 \t%lhs, $src, $amt;\n\t",
1195     !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1196     !strconcat("shr.b64 \t%rhs, $src, %amt2;\n\t",
1197     !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1198     !strconcat("}}", ""))))))))),
1199     [(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
1200
1201 def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
1202     Int32Regs:$amt),
1203     !strconcat("{{\n\t",
1204     !strconcat(".reg .b64 %lhs;\n\t",
1205     !strconcat(".reg .b64 %rhs;\n\t",
1206     !strconcat(".reg .u32 %amt2;\n\t",
1207     !strconcat("shr.b64 \t%lhs, $src, $amt;\n\t",
1208     !strconcat("sub.u32 \t%amt2, 64, $amt;\n\t",
1209     !strconcat("shl.b64 \t%rhs, $src, %amt2;\n\t",
1210     !strconcat("add.u64 \t$dst, %lhs, %rhs;\n\t",
1211     !strconcat("}}", ""))))))))),
1212     [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
1213
1214
1215 //-----------------------------------
1216 // Data Movement (Load / Store, Move)
1217 //-----------------------------------
1218
1219 def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex],
1220   [SDNPWantRoot]>;
1221 def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex],
1222   [SDNPWantRoot]>;
1223
1224 def MEMri : Operand<i32> {
1225   let PrintMethod = "printMemOperand";
1226   let MIOperandInfo = (ops Int32Regs, i32imm);
1227 }
1228 def MEMri64 : Operand<i64> {
1229   let PrintMethod = "printMemOperand";
1230   let MIOperandInfo = (ops Int64Regs, i64imm);
1231 }
1232
1233 def imem : Operand<iPTR> {
1234     let PrintMethod = "printOperand";
1235 }
1236
1237 def imemAny : Operand<iPTRAny> {
1238     let PrintMethod = "printOperand";
1239 }
1240
1241 def LdStCode : Operand<i32> {
1242     let PrintMethod = "printLdStCode";
1243 }
1244
1245 def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
1246 def Wrapper    : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
1247
1248 def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
1249                      "mov.u32 \t$dst, $a;",
1250                      [(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1251
1252 def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
1253                      "mov.u64 \t$dst, $a;",
1254                      [(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
1255
1256 // copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
1257 let IsSimpleMove=1 in {
1258 def IMOV1rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
1259                    "mov.pred \t$dst, $sss;", []>;
1260 def IMOV8rr: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$sss),
1261                     "mov.u16 \t$dst, $sss;", []>;
1262 def IMOV16rr: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
1263                     "mov.u16 \t$dst, $sss;", []>;
1264 def IMOV32rr: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
1265                     "mov.u32 \t$dst, $sss;", []>;
1266 def IMOV64rr: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
1267                     "mov.u64 \t$dst, $sss;", []>;
1268
1269 def FMOV32rr: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
1270                     "mov.f32 \t$dst, $src;", []>;
1271 def FMOV64rr: NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
1272                     "mov.f64 \t$dst, $src;", []>;
1273 }
1274 def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
1275                     "mov.pred \t$dst, $src;",
1276           [(set Int1Regs:$dst, imm:$src)]>;
1277 def IMOV8ri: NVPTXInst<(outs Int8Regs:$dst), (ins i8imm:$src),
1278                     "mov.u16 \t$dst, $src;",
1279           [(set Int8Regs:$dst, imm:$src)]>;
1280 def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1281                     "mov.u16 \t$dst, $src;",
1282           [(set Int16Regs:$dst, imm:$src)]>;
1283 def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
1284                     "mov.u32 \t$dst, $src;",
1285           [(set Int32Regs:$dst, imm:$src)]>;
1286 def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
1287                     "mov.u64 \t$dst, $src;",
1288           [(set Int64Regs:$dst, imm:$src)]>;
1289
1290 def FMOV32ri: NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
1291                     "mov.f32 \t$dst, $src;",
1292           [(set Float32Regs:$dst, fpimm:$src)]>;
1293 def FMOV64ri: NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
1294                     "mov.f64 \t$dst, $src;",
1295           [(set Float64Regs:$dst, fpimm:$src)]>;
1296
1297 def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
1298
1299 //---- Copy Frame Index ----
1300 def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
1301                         "add.u32 \t$dst, ${addr:add};",
1302                         [(set Int32Regs:$dst, ADDRri:$addr)]>;
1303 def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
1304                         "add.u64 \t$dst, ${addr:add};",
1305                         [(set Int64Regs:$dst, ADDRri64:$addr)]>;
1306
1307 //-----------------------------------
1308 // Comparison and Selection
1309 //-----------------------------------
1310
1311 // Generate string block like
1312 // {
1313 //   .reg .pred p;
1314 //   setp.gt.s16 p, %a, %b;
1315 //   selp.s16 %dst, -1, 0, p;
1316 // }
1317 // when OpcStr=setp.gt.s sz1=16 sz2=16 d=%dst a=%a b=%b
1318 class Set_Str<string OpcStr, string sz1, string sz2, string d, string a,
1319   string b> {
1320   string t1  = "{{\n\t.reg .pred p;\n\t";
1321   string t2  = !strconcat(t1 , OpcStr);
1322   string t3  = !strconcat(t2 , sz1);
1323   string t4  = !strconcat(t3 , " \tp, ");
1324   string t5  = !strconcat(t4 , a);
1325   string t6  = !strconcat(t5 , ", ");
1326   string t7  = !strconcat(t6 , b);
1327   string t8  = !strconcat(t7 , ";\n\tselp.s");
1328   string t9  = !strconcat(t8 , sz2);
1329   string t10 = !strconcat(t9, " \t");
1330   string t11 = !strconcat(t10, d);
1331   string s   = !strconcat(t11, ", -1, 0, p;\n\t}}");
1332 }
1333
1334 // Generate string block like
1335 // {
1336 //   .reg .pred p;
1337 //   .reg .s16 %temp1;
1338 //   .reg .s16 %temp2;
1339 //   cvt.s16.s8 %temp1, %a;
1340 //   cvt s16.s8 %temp1, %b;
1341 //   setp.gt.s16 p, %temp1, %temp2;
1342 //   selp.s16 %dst, -1, 0, p;
1343 // }
1344 // when OpcStr=setp.gt.s d=%dst a=%a b=%b type=s16 cvt=cvt.s16.s8
1345 class Set_Stri8<string OpcStr, string d, string a, string b, string type,
1346   string cvt> {
1347   string t1  = "{{\n\t.reg .pred p;\n\t";
1348   string t2  = !strconcat(t1, ".reg .");
1349   string t3  = !strconcat(t2, type);
1350   string t4  = !strconcat(t3, " %temp1;\n\t");
1351   string t5  = !strconcat(t4, ".reg .");
1352   string t6  = !strconcat(t5, type);
1353   string t7  = !strconcat(t6, " %temp2;\n\t");
1354   string t8  = !strconcat(t7, cvt);
1355   string t9  = !strconcat(t8, " \t%temp1, ");
1356   string t10 = !strconcat(t9, a);
1357   string t11 = !strconcat(t10, ";\n\t");
1358   string t12 = !strconcat(t11, cvt);
1359   string t13 = !strconcat(t12, " \t%temp2, ");
1360   string t14 = !strconcat(t13, b);
1361   string t15 = !strconcat(t14, ";\n\t");
1362   string t16 = !strconcat(t15, OpcStr);
1363   string t17 = !strconcat(t16, "16");
1364   string t18 = !strconcat(t17, " \tp, %temp1, %temp2;\n\t");
1365   string t19 = !strconcat(t18, "selp.s16 \t");
1366   string t20 = !strconcat(t19, d);
1367   string s   = !strconcat(t20, ", -1, 0, p;\n\t}}");
1368 }
1369
1370 multiclass ISET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode,
1371   string TypeStr, string CVTStr> {
1372   def i8rr_toi8: NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
1373                      Set_Stri8<OpcStr, "$dst", "$a", "$b", TypeStr, CVTStr>.s,
1374                []>;
1375   def i16rr_toi16: NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,
1376       Int16Regs:$b),
1377                      Set_Str<OpcStr, "16", "16", "$dst", "$a", "$b">.s,
1378                []>;
1379   def i32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1380       Int32Regs:$b),
1381                      Set_Str<OpcStr, "32", "32", "$dst", "$a", "$b">.s,
1382                []>;
1383   def i64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,
1384       Int64Regs:$b),
1385                      Set_Str<OpcStr, "64", "64", "$dst", "$a", "$b">.s,
1386                []>;
1387
1388   def i8rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
1389                      Handle_i8rr<OpcStr, TypeStr, CVTStr>.s,
1390                [(set Int1Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
1391   def i8ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
1392                      Handle_i8ri<OpcStr, TypeStr, CVTStr>.s,
1393                [(set Int1Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
1394   def i8ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
1395                      Handle_i8ir<OpcStr, TypeStr, CVTStr>.s,
1396                [(set Int1Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
1397   def i16rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
1398                  !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1399                [(set Int1Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
1400   def i16ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
1401                  !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1402                [(set Int1Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
1403   def i16ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
1404                  !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
1405                [(set Int1Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
1406   def i32rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
1407                  !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1408                [(set Int1Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
1409   def i32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1410                  !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1411                [(set Int1Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
1412   def i32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
1413                  !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
1414                [(set Int1Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
1415   def i64rr_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
1416                  !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1417                [(set Int1Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
1418   def i64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
1419                  !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1420                [(set Int1Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1421   def i64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
1422                  !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
1423                [(set Int1Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
1424
1425   def i8rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, Int8Regs:$b),
1426                      Handle_i8rr<OpcStr_u32, TypeStr, CVTStr>.s,
1427                [(set Int32Regs:$dst, (OpNode Int8Regs:$a, Int8Regs:$b))]>;
1428   def i8ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int8Regs:$a, i8imm:$b),
1429                      Handle_i8ri<OpcStr_u32, TypeStr, CVTStr>.s,
1430                [(set Int32Regs:$dst, (OpNode Int8Regs:$a, imm:$b))]>;
1431   def i8ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i8imm:$a, Int8Regs:$b),
1432                      Handle_i8ir<OpcStr_u32, TypeStr, CVTStr>.s,
1433                [(set Int32Regs:$dst, (OpNode imm:$a, Int8Regs:$b))]>;
1434   def i16rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a,
1435       Int16Regs:$b),
1436                  !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
1437                [(set Int32Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
1438   def i16ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
1439                  !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
1440                [(set Int32Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
1441   def i16ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i16imm:$a, Int16Regs:$b),
1442                  !strconcat(OpcStr_u32, "16 \t$dst, $a, $b;"),
1443                [(set Int32Regs:$dst, (OpNode imm:$a, Int16Regs:$b))]>;
1444   def i32rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,
1445       Int32Regs:$b),
1446                  !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
1447                [(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
1448   def i32ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1449                  !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
1450                [(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
1451   def i32ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, Int32Regs:$b),
1452                  !strconcat(OpcStr_u32, "32 \t$dst, $a, $b;"),
1453                [(set Int32Regs:$dst, (OpNode imm:$a, Int32Regs:$b))]>;
1454   def i64rr_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a,
1455       Int64Regs:$b),
1456                  !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
1457                [(set Int32Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
1458   def i64ri_u32: NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
1459                  !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
1460                [(set Int32Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
1461   def i64ir_u32: NVPTXInst<(outs Int32Regs:$dst), (ins i64imm:$a, Int64Regs:$b),
1462                  !strconcat(OpcStr_u32, "64 \t$dst, $a, $b;"),
1463                [(set Int32Regs:$dst, (OpNode imm:$a, Int64Regs:$b))]>;
1464 }
1465
1466 multiclass FSET_FORMAT<string OpcStr, string OpcStr_u32, PatFrag OpNode> {
1467   def f32rr_toi32_ftz: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
1468       Float32Regs:$b),
1469                      Set_Str<OpcStr, "ftz.f32", "32", "$dst", "$a", "$b">.s,
1470                []>, Requires<[doF32FTZ]>;
1471   def f32rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float32Regs:$a,
1472       Float32Regs:$b),
1473                      Set_Str<OpcStr, "f32", "32", "$dst", "$a", "$b">.s,
1474                []>;
1475   def f64rr_toi64: NVPTXInst<(outs Int64Regs:$dst), (ins Float64Regs:$a,
1476       Float64Regs:$b),
1477                      Set_Str<OpcStr, "f64", "64", "$dst", "$a", "$b">.s,
1478                []>;
1479   def f64rr_toi32: NVPTXInst<(outs Int32Regs:$dst), (ins Float64Regs:$a,
1480       Float64Regs:$b),
1481                      Set_Str<OpcStr, "f64", "32", "$dst", "$a", "$b">.s,
1482                []>;
1483
1484   def f32rr_p_ftz: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a
1485       , Float32Regs:$b),
1486                  !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
1487                [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>
1488   , Requires<[doF32FTZ]>;
1489   def f32rr_p: NVPTXInst<(outs Int1Regs:$dst),
1490     (ins Float32Regs:$a, Float32Regs:$b),
1491                  !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
1492                [(set Int1Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
1493   def f32ri_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
1494     (ins Float32Regs:$a, f32imm:$b),
1495                  !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
1496                [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
1497   Requires<[doF32FTZ]>;
1498   def f32ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float32Regs:$a, f32imm:$b),
1499                  !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
1500                [(set Int1Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
1501   def f32ir_p_ftz: NVPTXInst<(outs Int1Regs:$dst),
1502     (ins f32imm:$a, Float32Regs:$b),
1503                  !strconcat(OpcStr, "ftz.f32 \t$dst, $a, $b;"),
1504                [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>,
1505   Requires<[doF32FTZ]>;
1506   def f32ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f32imm:$a, Float32Regs:$b),
1507                  !strconcat(OpcStr, "f32 \t$dst, $a, $b;"),
1508                [(set Int1Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
1509   def f64rr_p: NVPTXInst<(outs Int1Regs:$dst),
1510     (ins Float64Regs:$a, Float64Regs:$b),
1511                  !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
1512                [(set Int1Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
1513   def f64ri_p: NVPTXInst<(outs Int1Regs:$dst), (ins Float64Regs:$a, f64imm:$b),
1514                  !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
1515                [(set Int1Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
1516   def f64ir_p: NVPTXInst<(outs Int1Regs:$dst), (ins f64imm:$a, Float64Regs:$b),
1517                  !strconcat(OpcStr, "f64 \t$dst, $a, $b;"),
1518                [(set Int1Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
1519
1520   def f32rr_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
1521     (ins Float32Regs:$a, Float32Regs:$b),
1522                  !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
1523                [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
1524   def f32rr_u32: NVPTXInst<(outs Int32Regs:$dst),
1525     (ins Float32Regs:$a, Float32Regs:$b),
1526                  !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
1527                [(set Int32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
1528   def f32ri_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
1529     (ins Float32Regs:$a, f32imm:$b),
1530                  !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
1531                [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
1532   def f32ri_u32: NVPTXInst<(outs Int32Regs:$dst),
1533     (ins Float32Regs:$a, f32imm:$b),
1534                  !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
1535                [(set Int32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
1536   def f32ir_u32_ftz: NVPTXInst<(outs Int32Regs:$dst),
1537     (ins f32imm:$a, Float32Regs:$b),
1538                  !strconcat(OpcStr_u32, "ftz.f32 \t$dst, $a, $b;"),
1539                [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
1540   def f32ir_u32: NVPTXInst<(outs Int32Regs:$dst),
1541     (ins f32imm:$a, Float32Regs:$b),
1542                  !strconcat(OpcStr_u32, "f32 \t$dst, $a, $b;"),
1543                [(set Int32Regs:$dst, (OpNode fpimm:$a, Float32Regs:$b))]>;
1544   def f64rr_u32: NVPTXInst<(outs Int32Regs:$dst),
1545     (ins Float64Regs:$a, Float64Regs:$b),
1546                  !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
1547                [(set Int32Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
1548   def f64ri_u32: NVPTXInst<(outs Int32Regs:$dst),
1549     (ins Float64Regs:$a, f64imm:$b),
1550                  !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
1551                [(set Int32Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
1552   def f64ir_u32: NVPTXInst<(outs Int32Regs:$dst),
1553     (ins f64imm:$a, Float64Regs:$b),
1554                  !strconcat(OpcStr_u32, "f64 \t$dst, $a, $b;"),
1555                [(set Int32Regs:$dst, (OpNode fpimm:$a, Float64Regs:$b))]>;
1556 }
1557
1558 defm ISetSGT
1559 : ISET_FORMAT<"setp.gt.s", "set.gt.u32.s", setgt, "s16", "cvt.s16.s8">;
1560 defm ISetUGT
1561 : ISET_FORMAT<"setp.gt.u", "set.gt.u32.u", setugt, "u16", "cvt.u16.u8">;
1562 defm ISetSLT
1563 : ISET_FORMAT<"setp.lt.s", "set.lt.u32.s", setlt, "s16", "cvt.s16.s8">;
1564 defm ISetULT
1565 : ISET_FORMAT<"setp.lt.u", "set.lt.u32.u", setult, "u16", "cvt.u16.u8">;
1566 defm ISetSGE
1567 : ISET_FORMAT<"setp.ge.s", "set.ge.u32.s", setge, "s16", "cvt.s16.s8">;
1568 defm ISetUGE
1569 : ISET_FORMAT<"setp.ge.u", "set.ge.u32.u", setuge, "u16", "cvt.u16.u8">;
1570 defm ISetSLE
1571 : ISET_FORMAT<"setp.le.s", "set.le.u32.s", setle, "s16", "cvt.s16.s8">;
1572 defm ISetULE
1573 : ISET_FORMAT<"setp.le.u", "set.le.u32.u", setule, "u16", "cvt.u16.u8">;
1574 defm ISetSEQ
1575 : ISET_FORMAT<"setp.eq.s", "set.eq.u32.s", seteq, "s16", "cvt.s16.s8">;
1576 defm ISetUEQ
1577 : ISET_FORMAT<"setp.eq.u", "set.eq.u32.u", setueq, "u16", "cvt.u16.u8">;
1578 defm ISetSNE
1579 : ISET_FORMAT<"setp.ne.s", "set.ne.u32.s", setne, "s16", "cvt.s16.s8">;
1580 defm ISetUNE
1581 : ISET_FORMAT<"setp.ne.u", "set.ne.u32.u", setune, "u16", "cvt.u16.u8">;
1582
1583 def ISetSNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1584   (ins Int1Regs:$a, Int1Regs:$b),
1585                       "xor.pred \t$dst, $a, $b;",
1586             [(set Int1Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
1587 def ISetUNEi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1588   (ins Int1Regs:$a, Int1Regs:$b),
1589                       "xor.pred \t$dst, $a, $b;",
1590             [(set Int1Regs:$dst, (setune Int1Regs:$a, Int1Regs:$b))]>;
1591 def ISetSEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1592   (ins Int1Regs:$a, Int1Regs:$b),
1593             !strconcat("{{\n\t",
1594             !strconcat(".reg .pred temp;\n\t",
1595             !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1596             !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
1597             [(set Int1Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
1598 def ISetUEQi1rr_p : NVPTXInst<(outs Int1Regs:$dst),
1599   (ins Int1Regs:$a, Int1Regs:$b),
1600             !strconcat("{{\n\t",
1601             !strconcat(".reg .pred temp;\n\t",
1602             !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1603             !strconcat("not.pred \t$dst, temp;\n\t}}","")))),
1604             [(set Int1Regs:$dst, (setueq Int1Regs:$a, Int1Regs:$b))]>;
1605
1606 // Compare 2 i1's and produce a u32
1607 def ISETSNEi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
1608   (ins Int1Regs:$a, Int1Regs:$b),
1609                   !strconcat("{{\n\t",
1610                   !strconcat(".reg .pred temp;\n\t",
1611                   !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1612                   !strconcat("selp.u32 \t$dst, -1, 0, temp;", "\n\t}}")))),
1613                   [(set Int32Regs:$dst, (setne Int1Regs:$a, Int1Regs:$b))]>;
1614 def ISETSEQi1rr_u32 : NVPTXInst<(outs Int32Regs:$dst),
1615   (ins Int1Regs:$a, Int1Regs:$b),
1616                   !strconcat("{{\n\t",
1617                   !strconcat(".reg .pred temp;\n\t",
1618                   !strconcat("xor.pred \ttemp, $a, $b;\n\t",
1619                   !strconcat("selp.u32 \t$dst, 0, -1, temp;", "\n\t}}")))),
1620                   [(set Int32Regs:$dst, (seteq Int1Regs:$a, Int1Regs:$b))]>;
1621
1622 defm FSetGT : FSET_FORMAT<"setp.gt.", "set.gt.u32.", setogt>;
1623 defm FSetLT : FSET_FORMAT<"setp.lt.", "set.lt.u32.", setolt>;
1624 defm FSetGE : FSET_FORMAT<"setp.ge.", "set.ge.u32.", setoge>;
1625 defm FSetLE : FSET_FORMAT<"setp.le.", "set.le.u32.", setole>;
1626 defm FSetEQ : FSET_FORMAT<"setp.eq.", "set.eq.u32.", setoeq>;
1627 defm FSetNE : FSET_FORMAT<"setp.ne.", "set.ne.u32.", setone>;
1628
1629 defm FSetUGT : FSET_FORMAT<"setp.gtu.", "set.gtu.u32.", setugt>;
1630 defm FSetULT : FSET_FORMAT<"setp.ltu.", "set.ltu.u32.",setult>;
1631 defm FSetUGE : FSET_FORMAT<"setp.geu.", "set.geu.u32.",setuge>;
1632 defm FSetULE : FSET_FORMAT<"setp.leu.", "set.leu.u32.",setule>;
1633 defm FSetUEQ : FSET_FORMAT<"setp.equ.", "set.equ.u32.",setueq>;
1634 defm FSetUNE : FSET_FORMAT<"setp.neu.", "set.neu.u32.",setune>;
1635
1636 defm FSetNUM : FSET_FORMAT<"setp.num.", "set.num.u32.",seto>;
1637 defm FSetNAN : FSET_FORMAT<"setp.nan.", "set.nan.u32.",setuo>;
1638
1639 def SELECTi1rr : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
1640                      (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
1641                              (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
1642 def SELECTi8rr : NVPTXInst<(outs Int8Regs:$dst),
1643   (ins Int8Regs:$a, Int8Regs:$b, Int1Regs:$p),
1644                       "selp.b16 \t$dst, $a, $b, $p;",
1645       [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, Int8Regs:$b))]>;
1646 def SELECTi8ri : NVPTXInst<(outs Int8Regs:$dst),
1647   (ins Int8Regs:$a, i8imm:$b, Int1Regs:$p),
1648                       "selp.b16 \t$dst, $a, $b, $p;",
1649       [(set Int8Regs:$dst, (select Int1Regs:$p, Int8Regs:$a, imm:$b))]>;
1650 def SELECTi8ir : NVPTXInst<(outs Int8Regs:$dst),
1651   (ins i8imm:$a, Int8Regs:$b, Int1Regs:$p),
1652                       "selp.b16 \t$dst, $a, $b, $p;",
1653       [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, Int8Regs:$b))]>;
1654 def SELECTi8ii : NVPTXInst<(outs Int8Regs:$dst),
1655   (ins i8imm:$a, i8imm:$b, Int1Regs:$p),
1656                       "selp.b16 \t$dst, $a, $b, $p;",
1657       [(set Int8Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1658
1659 def SELECTi16rr : NVPTXInst<(outs Int16Regs:$dst),
1660   (ins Int16Regs:$a, Int16Regs:$b, Int1Regs:$p),
1661                       "selp.b16 \t$dst, $a, $b, $p;",
1662       [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, Int16Regs:$b))]>;
1663 def SELECTi16ri : NVPTXInst<(outs Int16Regs:$dst),
1664   (ins Int16Regs:$a, i16imm:$b, Int1Regs:$p),
1665                       "selp.b16 \t$dst, $a, $b, $p;",
1666       [(set Int16Regs:$dst, (select Int1Regs:$p, Int16Regs:$a, imm:$b))]>;
1667 def SELECTi16ir : NVPTXInst<(outs Int16Regs:$dst),
1668   (ins i16imm:$a, Int16Regs:$b, Int1Regs:$p),
1669                       "selp.b16 \t$dst, $a, $b, $p;",
1670       [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, Int16Regs:$b))]>;
1671 def SELECTi16ii : NVPTXInst<(outs Int16Regs:$dst),
1672   (ins i16imm:$a, i16imm:$b, Int1Regs:$p),
1673                       "selp.b16 \t$dst, $a, $b, $p;",
1674       [(set Int16Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1675
1676 def SELECTi32rr : NVPTXInst<(outs Int32Regs:$dst),
1677   (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p),
1678                       "selp.b32 \t$dst, $a, $b, $p;",
1679       [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, Int32Regs:$b))]>;
1680 def SELECTi32ri : NVPTXInst<(outs Int32Regs:$dst),
1681   (ins Int32Regs:$a, i32imm:$b, Int1Regs:$p),
1682                       "selp.b32 \t$dst, $a, $b, $p;",
1683       [(set Int32Regs:$dst, (select Int1Regs:$p, Int32Regs:$a, imm:$b))]>;
1684 def SELECTi32ir : NVPTXInst<(outs Int32Regs:$dst),
1685   (ins i32imm:$a, Int32Regs:$b, Int1Regs:$p),
1686                       "selp.b32 \t$dst, $a, $b, $p;",
1687       [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, Int32Regs:$b))]>;
1688 def SELECTi32ii : NVPTXInst<(outs Int32Regs:$dst),
1689   (ins i32imm:$a, i32imm:$b, Int1Regs:$p),
1690                       "selp.b32 \t$dst, $a, $b, $p;",
1691       [(set Int32Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1692
1693 def SELECTi64rr : NVPTXInst<(outs Int64Regs:$dst),
1694   (ins Int64Regs:$a, Int64Regs:$b, Int1Regs:$p),
1695                       "selp.b64 \t$dst, $a, $b, $p;",
1696       [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, Int64Regs:$b))]>;
1697 def SELECTi64ri : NVPTXInst<(outs Int64Regs:$dst),
1698   (ins Int64Regs:$a, i64imm:$b, Int1Regs:$p),
1699                       "selp.b64 \t$dst, $a, $b, $p;",
1700       [(set Int64Regs:$dst, (select Int1Regs:$p, Int64Regs:$a, imm:$b))]>;
1701 def SELECTi64ir : NVPTXInst<(outs Int64Regs:$dst),
1702   (ins i64imm:$a, Int64Regs:$b, Int1Regs:$p),
1703                       "selp.b64 \t$dst, $a, $b, $p;",
1704       [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, Int64Regs:$b))]>;
1705 def SELECTi64ii : NVPTXInst<(outs Int64Regs:$dst),
1706   (ins i64imm:$a, i64imm:$b, Int1Regs:$p),
1707                       "selp.b64 \t$dst, $a, $b, $p;",
1708       [(set Int64Regs:$dst, (select Int1Regs:$p, imm:$a, imm:$b))]>;
1709
1710 def SELECTf32rr : NVPTXInst<(outs Float32Regs:$dst),
1711   (ins Float32Regs:$a, Float32Regs:$b, Int1Regs:$p),
1712                       "selp.f32 \t$dst, $a, $b, $p;",
1713       [(set Float32Regs:$dst,
1714         (select Int1Regs:$p, Float32Regs:$a, Float32Regs:$b))]>;
1715 def SELECTf32ri : NVPTXInst<(outs Float32Regs:$dst),
1716   (ins Float32Regs:$a, f32imm:$b, Int1Regs:$p),
1717                       "selp.f32 \t$dst, $a, $b, $p;",
1718       [(set Float32Regs:$dst, (select Int1Regs:$p, Float32Regs:$a, fpimm:$b))]>;
1719 def SELECTf32ir : NVPTXInst<(outs Float32Regs:$dst),
1720   (ins f32imm:$a, Float32Regs:$b, Int1Regs:$p),
1721                       "selp.f32 \t$dst, $a, $b, $p;",
1722       [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float32Regs:$b))]>;
1723 def SELECTf32ii : NVPTXInst<(outs Float32Regs:$dst),
1724   (ins f32imm:$a, f32imm:$b, Int1Regs:$p),
1725                       "selp.f32 \t$dst, $a, $b, $p;",
1726       [(set Float32Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
1727
1728 def SELECTf64rr : NVPTXInst<(outs Float64Regs:$dst),
1729   (ins Float64Regs:$a, Float64Regs:$b, Int1Regs:$p),
1730                       "selp.f64 \t$dst, $a, $b, $p;",
1731       [(set Float64Regs:$dst,
1732         (select Int1Regs:$p, Float64Regs:$a, Float64Regs:$b))]>;
1733 def SELECTf64ri : NVPTXInst<(outs Float64Regs:$dst),
1734   (ins Float64Regs:$a, f64imm:$b, Int1Regs:$p),
1735                       "selp.f64 \t$dst, $a, $b, $p;",
1736       [(set Float64Regs:$dst, (select Int1Regs:$p, Float64Regs:$a, fpimm:$b))]>;
1737 def SELECTf64ir : NVPTXInst<(outs Float64Regs:$dst),
1738   (ins f64imm:$a, Float64Regs:$b, Int1Regs:$p),
1739                       "selp.f64 \t$dst, $a, $b, $p;",
1740       [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, Float64Regs:$b))]>;
1741 def SELECTf64ii : NVPTXInst<(outs Float64Regs:$dst),
1742   (ins f64imm:$a, f64imm:$b, Int1Regs:$p),
1743                       "selp.f64 \t $dst, $a, $b, $p;",
1744       [(set Float64Regs:$dst, (select Int1Regs:$p, fpimm:$a, fpimm:$b))]>;
1745
1746 //def ld_param         : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad,
1747 //                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
1748
1749 def SDTDeclareParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
1750   SDTCisInt<2>]>;
1751 def SDTDeclareScalarParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>,
1752   SDTCisInt<1>, SDTCisInt<2>]>;
1753 def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
1754 def SDTPrintCallProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1755 def SDTPrintCallUniProfile : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
1756 def SDTStoreParamProfile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1757 def SDTStoreParam32Profile : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>]>;
1758 def SDTCallArgProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1759 def SDTCallArgMarkProfile : SDTypeProfile<0, 0, []>;
1760 def SDTCallVoidProfile : SDTypeProfile<0, 1, []>;
1761 def SDTCallValProfile : SDTypeProfile<1, 0, []>;
1762 def SDTMoveParamProfile : SDTypeProfile<1, 1, []>;
1763 def SDTMoveRetvalProfile : SDTypeProfile<0, 1, []>;
1764 def SDTStoreRetvalProfile : SDTypeProfile<0, 2, [SDTCisInt<0>]>;
1765 def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
1766
1767 def DeclareParam : SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
1768                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1769 def DeclareScalarParam : SDNode<"NVPTXISD::DeclareScalarParam",
1770   SDTDeclareScalarParamProfile,
1771                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1772 def DeclareRetParam : SDNode<"NVPTXISD::DeclareRetParam",
1773   SDTDeclareParamProfile,
1774                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1775 def DeclareRet   : SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
1776                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1777 def LoadParam    : SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
1778                          [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
1779 def PrintCall    : SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
1780                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1781 def PrintCallUni : SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile,
1782                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1783 def StoreParam   : SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
1784                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1785 def StoreParamU32 : SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
1786                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1787 def StoreParamS32 : SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
1788                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1789 def MoveToParam  : SDNode<"NVPTXISD::MoveToParam", SDTStoreParamProfile,
1790                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1791 def CallArgBegin : SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
1792                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1793 def CallArg      : SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
1794                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1795 def LastCallArg  : SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
1796                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1797 def CallArgEnd   : SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
1798                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1799 def CallVoid     : SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
1800                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1801 def Prototype    : SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
1802                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1803 def CallVal      : SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
1804                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1805 def MoveParam    : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile,
1806                          []>;
1807 def MoveRetval   : SDNode<"NVPTXISD::MoveRetval", SDTMoveRetvalProfile,
1808                          [SDNPHasChain, SDNPSideEffect]>;
1809 def StoreRetval  : SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile,
1810                          [SDNPHasChain, SDNPSideEffect]>;
1811 def MoveToRetval : SDNode<"NVPTXISD::MoveToRetval", SDTStoreRetvalProfile,
1812                          [SDNPHasChain, SDNPSideEffect]>;
1813 def PseudoUseParam : SDNode<"NVPTXISD::PseudoUseParam",
1814   SDTPseudoUseParamProfile,
1815                        [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
1816 def RETURNNode   : SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
1817                          [SDNPHasChain, SDNPSideEffect]>;
1818
1819 class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
1820       NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1821                 !strconcat(!strconcat("ld.param", opstr),
1822                 "\t$dst, [retval0+$b];"),
1823                 [(set regclass:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
1824
1825 class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
1826       NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
1827                 !strconcat(!strconcat("mov", opstr),
1828                 "\t$dst, retval$b;"),
1829                 [(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
1830
1831 class StoreParamInst<NVPTXRegClass regclass, string opstr> :
1832       NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1833                 !strconcat(!strconcat("st.param", opstr),
1834                 "\t[param$a+$b], $val;"),
1835                 [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
1836
1837 class MoveToParamInst<NVPTXRegClass regclass, string opstr> :
1838       NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
1839                 !strconcat(!strconcat("mov", opstr),
1840                 "\tparam$a, $val;"),
1841                 [(MoveToParam (i32 imm:$a), (i32 imm:$b), regclass:$val)]>;
1842
1843 class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
1844       NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
1845                 !strconcat(!strconcat("st.param", opstr),
1846                 "\t[func_retval0+$a], $val;"),
1847                 [(StoreRetval (i32 imm:$a), regclass:$val)]>;
1848
1849 class MoveToRetvalInst<NVPTXRegClass regclass, string opstr> :
1850       NVPTXInst<(outs), (ins i32imm:$num, regclass:$val),
1851                 !strconcat(!strconcat("mov", opstr),
1852                 "\tfunc_retval$num, $val;"),
1853                 [(MoveToRetval (i32 imm:$num), regclass:$val)]>;
1854
1855 class MoveRetvalInst<NVPTXRegClass regclass, string opstr> :
1856       NVPTXInst<(outs), (ins regclass:$val),
1857                 !strconcat(!strconcat("mov", opstr),
1858                 "\tfunc_retval0, $val;"),
1859                 [(MoveRetval regclass:$val)]>;
1860
1861 def PrintCallRetInst1 : NVPTXInst<(outs), (ins),
1862 "call (retval0), ",
1863                                 [(PrintCall (i32 1))]>;
1864 def PrintCallRetInst2 : NVPTXInst<(outs), (ins),
1865 "call (retval0, retval1), ",
1866                                 [(PrintCall (i32 2))]>;
1867 def PrintCallRetInst3 : NVPTXInst<(outs), (ins),
1868 "call (retval0, retval1, retval2), ",
1869                                 [(PrintCall (i32 3))]>;
1870 def PrintCallRetInst4 : NVPTXInst<(outs), (ins),
1871 "call (retval0, retval1, retval2, retval3), ",
1872                                 [(PrintCall (i32 4))]>;
1873 def PrintCallRetInst5 : NVPTXInst<(outs), (ins),
1874 "call (retval0, retval1, retval2, retval3, retval4), ",
1875                                 [(PrintCall (i32 5))]>;
1876 def PrintCallRetInst6 : NVPTXInst<(outs), (ins),
1877 "call (retval0, retval1, retval2, retval3, retval4, retval5), ",
1878                                 [(PrintCall (i32 6))]>;
1879 def PrintCallRetInst7 : NVPTXInst<(outs), (ins),
1880 "call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1881                                 [(PrintCall (i32 7))]>;
1882 def PrintCallRetInst8 : NVPTXInst<(outs), (ins),
1883 !strconcat("call (retval0, retval1, retval2, retval3, retval4",
1884            ", retval5, retval6, retval7), "),
1885                                 [(PrintCall (i32 8))]>;
1886
1887 def PrintCallNoRetInst : NVPTXInst<(outs), (ins), "call ",
1888                                 [(PrintCall (i32 0))]>;
1889
1890 def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins),
1891 "call.uni (retval0), ",
1892                                 [(PrintCallUni (i32 1))]>;
1893 def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins),
1894 "call.uni (retval0, retval1), ",
1895                                 [(PrintCallUni (i32 2))]>;
1896 def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins),
1897 "call.uni (retval0, retval1, retval2), ",
1898                                 [(PrintCallUni (i32 3))]>;
1899 def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins),
1900 "call.uni (retval0, retval1, retval2, retval3), ",
1901                                 [(PrintCallUni (i32 4))]>;
1902 def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins),
1903 "call.uni (retval0, retval1, retval2, retval3, retval4), ",
1904                                 [(PrintCallUni (i32 5))]>;
1905 def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins),
1906 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ",
1907                                 [(PrintCallUni (i32 6))]>;
1908 def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins),
1909 "call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
1910                                 [(PrintCallUni (i32 7))]>;
1911 def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins),
1912 !strconcat("call.uni (retval0, retval1, retval2, retval3, retval4",
1913            ", retval5, retval6, retval7), "),
1914                                 [(PrintCallUni (i32 8))]>;
1915
1916 def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ",
1917                                 [(PrintCallUni (i32 0))]>;
1918
1919 def LoadParamMemI64    : LoadParamMemInst<Int64Regs, ".b64">;
1920 def LoadParamMemI32    : LoadParamMemInst<Int32Regs, ".b32">;
1921 def LoadParamMemI16    : LoadParamMemInst<Int16Regs, ".b16">;
1922 def LoadParamMemI8     : LoadParamMemInst<Int8Regs, ".b8">;
1923
1924 //def LoadParamMemI16    : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
1925 //                !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
1926 //                "cvt.u16.u32\t$dst, temp_param_reg;"),
1927 //                [(set Int16Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
1928 //def LoadParamMemI8     : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
1929 //                !strconcat("ld.param.b32\ttemp_param_reg, [retval0+$b];\n\t",
1930 //                "cvt.u16.u32\t$dst, temp_param_reg;"),
1931 //                [(set Int8Regs:$dst, (LoadParam (i32 1), (i32 imm:$b)))]>;
1932
1933 def LoadParamMemF32    : LoadParamMemInst<Float32Regs, ".f32">;
1934 def LoadParamMemF64    : LoadParamMemInst<Float64Regs, ".f64">;
1935
1936 def LoadParamRegI64    : LoadParamRegInst<Int64Regs, ".b64">;
1937 def LoadParamRegI32    : LoadParamRegInst<Int32Regs, ".b32">;
1938 def LoadParamRegI16    : NVPTXInst<(outs Int16Regs:$dst), (ins i32imm:$b),
1939                          "cvt.u16.u32\t$dst, retval$b;",
1940                          [(set Int16Regs:$dst,
1941                            (LoadParam (i32 0), (i32 imm:$b)))]>;
1942 def LoadParamRegI8     : NVPTXInst<(outs Int8Regs:$dst), (ins i32imm:$b),
1943                          "cvt.u16.u32\t$dst, retval$b;",
1944                          [(set Int8Regs:$dst,
1945                            (LoadParam (i32 0), (i32 imm:$b)))]>;
1946
1947 def LoadParamRegF32    : LoadParamRegInst<Float32Regs, ".f32">;
1948 def LoadParamRegF64    : LoadParamRegInst<Float64Regs, ".f64">;
1949
1950 def StoreParamI64    : StoreParamInst<Int64Regs, ".b64">;
1951 def StoreParamI32    : StoreParamInst<Int32Regs, ".b32">;
1952
1953 def StoreParamI16    : NVPTXInst<(outs),
1954   (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1955                        "st.param.b16\t[param$a+$b], $val;",
1956            [(StoreParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1957
1958 def StoreParamI8     : NVPTXInst<(outs),
1959   (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1960                        "st.param.b8\t[param$a+$b], $val;",
1961                        [(StoreParam
1962                          (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
1963
1964 def StoreParamS32I16 : NVPTXInst<(outs),
1965   (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1966                  !strconcat("cvt.s32.s16\ttemp_param_reg, $val;\n\t",
1967                             "st.param.b32\t[param$a+$b], temp_param_reg;"),
1968                  [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1969 def StoreParamU32I16 : NVPTXInst<(outs),
1970   (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1971                  !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
1972                             "st.param.b32\t[param$a+$b], temp_param_reg;"),
1973                  [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1974
1975 def StoreParamU32I8   : NVPTXInst<(outs),
1976   (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1977                  !strconcat("cvt.u32.u8\ttemp_param_reg, $val;\n\t",
1978                             "st.param.b32\t[param$a+$b], temp_param_reg;"),
1979                  [(StoreParamU32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
1980 def StoreParamS32I8   : NVPTXInst<(outs),
1981   (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
1982                  !strconcat("cvt.s32.s8\ttemp_param_reg, $val;\n\t",
1983                             "st.param.b32\t[param$a+$b], temp_param_reg;"),
1984                  [(StoreParamS32 (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
1985
1986 def StoreParamF32    : StoreParamInst<Float32Regs, ".f32">;
1987 def StoreParamF64    : StoreParamInst<Float64Regs, ".f64">;
1988
1989 def MoveToParamI64   : MoveToParamInst<Int64Regs, ".b64">;
1990 def MoveToParamI32   : MoveToParamInst<Int32Regs, ".b32">;
1991 def MoveToParamF64   : MoveToParamInst<Float64Regs, ".f64">;
1992 def MoveToParamF32   : MoveToParamInst<Float32Regs, ".f32">;
1993 def MoveToParamI16   : NVPTXInst<(outs),
1994   (ins Int16Regs:$val, i32imm:$a, i32imm:$b),
1995                    !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
1996                               "mov.b32\tparam$a, temp_param_reg;"),
1997                    [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int16Regs:$val)]>;
1998 def MoveToParamI8    : NVPTXInst<(outs),
1999   (ins Int8Regs:$val, i32imm:$a, i32imm:$b),
2000                    !strconcat("cvt.u32.u16\ttemp_param_reg, $val;\n\t",
2001                               "mov.b32\tparam$a, temp_param_reg;"),
2002                    [(MoveToParam (i32 imm:$a), (i32 imm:$b), Int8Regs:$val)]>;
2003
2004 def StoreRetvalI64    : StoreRetvalInst<Int64Regs, ".b64">;
2005 def StoreRetvalI32    : StoreRetvalInst<Int32Regs, ".b32">;
2006 def StoreRetvalI16    : StoreRetvalInst<Int16Regs, ".b16">;
2007 def StoreRetvalI8     : StoreRetvalInst<Int8Regs, ".b8">;
2008
2009 //def StoreRetvalI16    : NVPTXInst<(outs), (ins Int16Regs:$val, i32imm:$a),
2010 //     !strconcat("\{\n\t",
2011 //     !strconcat(".reg .b32 temp_retval_reg;\n\t",
2012 //     !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
2013 //                "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
2014 //     [(StoreRetval (i32 imm:$a), Int16Regs:$val)]>;
2015 //def StoreRetvalI8     : NVPTXInst<(outs), (ins Int8Regs:$val, i32imm:$a),
2016 //     !strconcat("\{\n\t",
2017 //     !strconcat(".reg .b32 temp_retval_reg;\n\t",
2018 //     !strconcat("cvt.u32.u16\ttemp_retval_reg, $val;\n\t",
2019 //                "st.param.b32\t[func_retval0+$a], temp_retval_reg;\n\t\}"))),
2020 //     [(StoreRetval (i32 imm:$a), Int8Regs:$val)]>;
2021
2022 def StoreRetvalF64    : StoreRetvalInst<Float64Regs, ".f64">;
2023 def StoreRetvalF32    : StoreRetvalInst<Float32Regs, ".f32">;
2024
2025 def MoveRetvalI64    : MoveRetvalInst<Int64Regs, ".b64">;
2026 def MoveRetvalI32    : MoveRetvalInst<Int32Regs, ".b32">;
2027 def MoveRetvalI16    : MoveRetvalInst<Int16Regs, ".b16">;
2028 def MoveRetvalI8     : MoveRetvalInst<Int8Regs, ".b8">;
2029 def MoveRetvalF64    : MoveRetvalInst<Float64Regs, ".f64">;
2030 def MoveRetvalF32    : MoveRetvalInst<Float32Regs, ".f32">;
2031
2032 def MoveToRetvalI64    : MoveToRetvalInst<Int64Regs, ".b64">;
2033 def MoveToRetvalI32    : MoveToRetvalInst<Int32Regs, ".b32">;
2034 def MoveToRetvalF64    : MoveToRetvalInst<Float64Regs, ".f64">;
2035 def MoveToRetvalF32    : MoveToRetvalInst<Float32Regs, ".f32">;
2036 def MoveToRetvalI16    : NVPTXInst<(outs), (ins i32imm:$num, Int16Regs:$val),
2037                          "cvt.u32.u16\tfunc_retval$num, $val;",
2038                          [(MoveToRetval (i32 imm:$num), Int16Regs:$val)]>;
2039 def MoveToRetvalI8     : NVPTXInst<(outs), (ins i32imm:$num, Int8Regs:$val),
2040                          "cvt.u32.u16\tfunc_retval$num, $val;",
2041                          [(MoveToRetval (i32 imm:$num), Int8Regs:$val)]>;
2042
2043 def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
2044 def CallArgEndInst1  : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
2045 def CallArgEndInst0  : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
2046 def RETURNInst       : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
2047
2048 class CallArgInst<NVPTXRegClass regclass> :
2049       NVPTXInst<(outs), (ins regclass:$a), "$a, ",
2050                 [(CallArg (i32 0), regclass:$a)]>;
2051
2052 class LastCallArgInst<NVPTXRegClass regclass> :
2053       NVPTXInst<(outs), (ins regclass:$a), "$a",
2054                 [(LastCallArg (i32 0), regclass:$a)]>;
2055
2056 def CallArgI64     : CallArgInst<Int64Regs>;
2057 def CallArgI32     : CallArgInst<Int32Regs>;
2058 def CallArgI16     : CallArgInst<Int16Regs>;
2059 def CallArgI8      : CallArgInst<Int8Regs>;
2060
2061 def CallArgF64     : CallArgInst<Float64Regs>;
2062 def CallArgF32     : CallArgInst<Float32Regs>;
2063
2064 def LastCallArgI64 : LastCallArgInst<Int64Regs>;
2065 def LastCallArgI32 : LastCallArgInst<Int32Regs>;
2066 def LastCallArgI16 : LastCallArgInst<Int16Regs>;
2067 def LastCallArgI8  : LastCallArgInst<Int8Regs>;
2068
2069 def LastCallArgF64 : LastCallArgInst<Float64Regs>;
2070 def LastCallArgF32 : LastCallArgInst<Float32Regs>;
2071
2072 def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ",
2073                               [(CallArg (i32 0), (i32 imm:$a))]>;
2074 def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a",
2075                               [(LastCallArg (i32 0), (i32 imm:$a))]>;
2076
2077 def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
2078                              [(CallArg (i32 1), (i32 imm:$a))]>;
2079 def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
2080                              [(LastCallArg (i32 1), (i32 imm:$a))]>;
2081
2082 def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr),
2083                              "$addr, ",
2084                              [(CallVoid (Wrapper tglobaladdr:$addr))]>;
2085 def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr),
2086                              "$addr, ",
2087                              [(CallVoid Int32Regs:$addr)]>;
2088 def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
2089                              "$addr, ",
2090                              [(CallVoid Int64Regs:$addr)]>;
2091 def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val),
2092                              ", prototype_$val;",
2093                              [(Prototype (i32 imm:$val))]>;
2094
2095 def DeclareRetMemInst : NVPTXInst<(outs),
2096   (ins i32imm:$align, i32imm:$size, i32imm:$num),
2097          ".param .align $align .b8 retval$num[$size];",
2098          [(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
2099 def DeclareRetScalarInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
2100          ".param .b$size retval$num;",
2101          [(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
2102 def DeclareRetRegInst : NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
2103          ".reg .b$size retval$num;",
2104          [(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
2105
2106 def DeclareParamInst : NVPTXInst<(outs),
2107   (ins i32imm:$align, i32imm:$a, i32imm:$size),
2108          ".param .align $align .b8 param$a[$size];",
2109          [(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
2110 def DeclareScalarParamInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
2111          ".param .b$size param$a;",
2112          [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
2113 def DeclareScalarRegInst : NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
2114          ".reg .b$size param$a;",
2115          [(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
2116
2117 class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
2118       NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
2119                 !strconcat(!strconcat("mov", asmstr), "\t$dst, $src;"),
2120                 [(set regclass:$dst, (MoveParam regclass:$src))]>;
2121
2122 def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
2123 def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
2124 def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
2125                    "cvt.u16.u32\t$dst, $src;",
2126                    [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
2127 def MoveParamI8  : NVPTXInst<(outs Int8Regs:$dst), (ins Int8Regs:$src),
2128                    "cvt.u16.u32\t$dst, $src;",
2129                    [(set Int8Regs:$dst, (MoveParam Int8Regs:$src))]>;
2130 def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
2131 def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
2132
2133 class PseudoUseParamInst<NVPTXRegClass regclass> :
2134       NVPTXInst<(outs), (ins regclass:$src),
2135       "// Pseudo use of $src",
2136       [(PseudoUseParam regclass:$src)]>;
2137
2138 def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
2139 def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
2140 def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
2141 def PseudoUseParamI8  : PseudoUseParamInst<Int8Regs>;
2142 def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
2143 def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
2144
2145
2146 //
2147 // Load / Store Handling
2148 //
2149 multiclass LD<NVPTXRegClass regclass> {
2150   def _avar : NVPTXInst<(outs regclass:$dst),
2151     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2152       i32imm:$fromWidth, imem:$addr),
2153 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2154            "$fromWidth \t$dst, [$addr];"), []>;
2155   def _areg : NVPTXInst<(outs regclass:$dst),
2156     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2157       i32imm:$fromWidth, Int32Regs:$addr),
2158 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2159            "$fromWidth \t$dst, [$addr];"), []>;
2160   def _areg_64 : NVPTXInst<(outs regclass:$dst),
2161     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2162      i32imm:$fromWidth, Int64Regs:$addr),
2163      !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
2164                 " \t$dst, [$addr];"), []>;
2165   def _ari : NVPTXInst<(outs regclass:$dst),
2166     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2167       i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2168 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2169            "$fromWidth \t$dst, [$addr+$offset];"), []>;
2170   def _ari_64 : NVPTXInst<(outs regclass:$dst),
2171     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2172      i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2173     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth",
2174                " \t$dst, [$addr+$offset];"), []>;
2175   def _asi : NVPTXInst<(outs regclass:$dst),
2176     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2177       i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2178 !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2179            "$fromWidth \t$dst, [$addr+$offset];"), []>;
2180 }
2181
2182 let mayLoad=1, neverHasSideEffects=1 in {
2183 defm LD_i8  : LD<Int8Regs>;
2184 defm LD_i16 : LD<Int16Regs>;
2185 defm LD_i32 : LD<Int32Regs>;
2186 defm LD_i64 : LD<Int64Regs>;
2187 defm LD_f32 : LD<Float32Regs>;
2188 defm LD_f64 : LD<Float64Regs>;
2189 }
2190
2191 multiclass ST<NVPTXRegClass regclass> {
2192   def _avar : NVPTXInst<(outs),
2193     (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2194       LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
2195 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2196            " \t[$addr], $src;"), []>;
2197   def _areg : NVPTXInst<(outs),
2198     (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2199       LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),
2200 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2201            " \t[$addr], $src;"), []>;
2202   def _areg_64 : NVPTXInst<(outs),
2203     (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2204      LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr),
2205   !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
2206                "\t[$addr], $src;"), []>;
2207   def _ari : NVPTXInst<(outs),
2208     (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2209       LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr, i32imm:$offset),
2210 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2211            " \t[$addr+$offset], $src;"), []>;
2212   def _ari_64 : NVPTXInst<(outs),
2213     (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2214      LdStCode:$Sign, i32imm:$toWidth, Int64Regs:$addr, i32imm:$offset),
2215   !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",
2216                "\t[$addr+$offset], $src;"), []>;
2217   def _asi : NVPTXInst<(outs),
2218     (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
2219       LdStCode:$Sign, i32imm:$toWidth, imem:$addr, i32imm:$offset),
2220 !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth",
2221            " \t[$addr+$offset], $src;"), []>;
2222 }
2223
2224 let mayStore=1, neverHasSideEffects=1 in {
2225 defm ST_i8  : ST<Int8Regs>;
2226 defm ST_i16 : ST<Int16Regs>;
2227 defm ST_i32 : ST<Int32Regs>;
2228 defm ST_i64 : ST<Int64Regs>;
2229 defm ST_f32 : ST<Float32Regs>;
2230 defm ST_f64 : ST<Float64Regs>;
2231 }
2232
2233 // The following is used only in and after vector elementizations.
2234 // Vector elementization happens at the machine instruction level, so the
2235 // following instruction
2236 // never appears in the DAG.
2237 multiclass LD_VEC<NVPTXRegClass regclass> {
2238   def _v2_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2239     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2240       i32imm:$fromWidth, imem:$addr),
2241     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2242                "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2243   def _v2_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2244     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2245       i32imm:$fromWidth, Int32Regs:$addr),
2246     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2247                "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2248   def _v2_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2249     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2250      i32imm:$fromWidth, Int64Regs:$addr),
2251     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2252                "$fromWidth \t{{$dst1, $dst2}}, [$addr];"), []>;
2253   def _v2_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2254     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2255       i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2256     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2257                "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2258   def _v2_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2259     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2260      i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2261     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2262                "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2263   def _v2_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2264     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2265       i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2266     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2267                "$fromWidth \t{{$dst1, $dst2}}, [$addr+$offset];"), []>;
2268   def _v4_avar : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2269       regclass:$dst3, regclass:$dst4),
2270     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2271       i32imm:$fromWidth, imem:$addr),
2272     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2273                "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2274   def _v4_areg : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2275       regclass:$dst4),
2276     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2277       i32imm:$fromWidth, Int32Regs:$addr),
2278     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2279                "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2280   def _v4_areg_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2281                                regclass:$dst3, regclass:$dst4),
2282     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2283      i32imm:$fromWidth, Int64Regs:$addr),
2284     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2285                "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"), []>;
2286   def _v4_ari : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2287       regclass:$dst4),
2288     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2289       i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2290     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2291                "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2292                 []>;
2293   def _v4_ari_64 : NVPTXInst<(outs regclass:$dst1, regclass:$dst2,
2294                               regclass:$dst3, regclass:$dst4),
2295     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2296      i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2297     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2298                "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2299     []>;
2300   def _v4_asi : NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3,
2301       regclass:$dst4),
2302     (ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2303       i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2304     !strconcat("ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2305                "$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"),
2306                 []>;
2307 }
2308 let mayLoad=1, neverHasSideEffects=1 in {
2309 defm LDV_i8  : LD_VEC<Int8Regs>;
2310 defm LDV_i16 : LD_VEC<Int16Regs>;
2311 defm LDV_i32 : LD_VEC<Int32Regs>;
2312 defm LDV_i64 : LD_VEC<Int64Regs>;
2313 defm LDV_f32 : LD_VEC<Float32Regs>;
2314 defm LDV_f64 : LD_VEC<Float64Regs>;
2315 }
2316
2317 multiclass ST_VEC<NVPTXRegClass regclass> {
2318   def _v2_avar : NVPTXInst<(outs),
2319     (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2320       LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),
2321     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2322                "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2323   def _v2_areg : NVPTXInst<(outs),
2324     (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2325       LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),
2326     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2327                "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2328   def _v2_areg_64 : NVPTXInst<(outs),
2329     (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2330      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),
2331     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2332                "$fromWidth \t[$addr], {{$src1, $src2}};"), []>;
2333   def _v2_ari : NVPTXInst<(outs),
2334     (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2335       LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
2336       i32imm:$offset),
2337     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2338                "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2339   def _v2_ari_64 : NVPTXInst<(outs),
2340     (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2341      LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
2342      i32imm:$offset),
2343     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2344                "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2345   def _v2_asi : NVPTXInst<(outs),
2346     (ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
2347       LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
2348       i32imm:$offset),
2349     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2350                "$fromWidth \t[$addr+$offset], {{$src1, $src2}};"), []>;
2351   def _v4_avar : NVPTXInst<(outs),
2352     (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2353       LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2354       i32imm:$fromWidth, imem:$addr),
2355     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2356                "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2357   def _v4_areg : NVPTXInst<(outs),
2358     (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2359       LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2360       i32imm:$fromWidth, Int32Regs:$addr),
2361     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2362                "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2363   def _v4_areg_64 : NVPTXInst<(outs),
2364     (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2365      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2366      i32imm:$fromWidth, Int64Regs:$addr),
2367     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2368                "$fromWidth \t[$addr], {{$src1, $src2, $src3, $src4}};"), []>;
2369   def _v4_ari : NVPTXInst<(outs),
2370     (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2371       LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2372       i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),
2373     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2374                "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2375     []>;
2376   def _v4_ari_64 : NVPTXInst<(outs),
2377     (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2378      LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2379      i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),
2380     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2381                "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2382      []>;
2383   def _v4_asi : NVPTXInst<(outs),
2384     (ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
2385       LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
2386       i32imm:$fromWidth, imem:$addr, i32imm:$offset),
2387     !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}",
2388                "$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"),
2389     []>;
2390 }
2391 let mayStore=1, neverHasSideEffects=1 in {
2392 defm STV_i8  : ST_VEC<Int8Regs>;
2393 defm STV_i16 : ST_VEC<Int16Regs>;
2394 defm STV_i32 : ST_VEC<Int32Regs>;
2395 defm STV_i64 : ST_VEC<Int64Regs>;
2396 defm STV_f32 : ST_VEC<Float32Regs>;
2397 defm STV_f64 : ST_VEC<Float64Regs>;
2398 }
2399
2400
2401 //---- Conversion ----
2402
2403 multiclass CVT_INT_TO_FP <string OpStr, SDNode OpNode> {
2404 // FIXME: need to add f16 support
2405 //  def CVTf16i8 :
2406 //    NVPTXInst<(outs Float16Regs:$d), (ins Int8Regs:$a),
2407 //              !strconcat(!strconcat("cvt.rn.f16.", OpStr), "8 \t$d, $a;"),
2408 //        [(set Float16Regs:$d, (OpNode Int8Regs:$a))]>;
2409 //  def CVTf16i16 :
2410 //    NVPTXInst<(outs Float16Regs:$d), (ins Int16Regs:$a),
2411 //              !strconcat(!strconcat("cvt.rn.f16.", OpStr), "16 \t$d, $a;"),
2412 //        [(set Float16Regs:$d, (OpNode Int16Regs:$a))]>;
2413 //  def CVTf16i32 :
2414 //    NVPTXInst<(outs Float16Regs:$d), (ins Int32Regs:$a),
2415 //              !strconcat(!strconcat("cvt.rn.f16.", OpStr), "32 \t$d, $a;"),
2416 //        [(set Float16Regs:$d, (OpNode Int32Regs:$a))]>;
2417 //  def CVTf16i64:
2418 //    NVPTXInst<(outs Float16Regs:$d), (ins Int64Regs:$a),
2419 //          !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
2420 //            [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
2421
2422   def CVTf32i1 :
2423     NVPTXInst<(outs Float32Regs:$d), (ins Int1Regs:$a),
2424               "selp.f32 \t$d, 1.0, 0.0, $a;",
2425         [(set Float32Regs:$d, (OpNode Int1Regs:$a))]>;
2426   def CVTf32i8 :
2427     NVPTXInst<(outs Float32Regs:$d), (ins Int8Regs:$a),
2428               !strconcat(!strconcat("cvt.rn.f32.", OpStr), "8 \t$d, $a;"),
2429         [(set Float32Regs:$d, (OpNode Int8Regs:$a))]>;
2430   def CVTf32i16 :
2431     NVPTXInst<(outs Float32Regs:$d), (ins Int16Regs:$a),
2432               !strconcat(!strconcat("cvt.rn.f32.", OpStr), "16 \t$d, $a;"),
2433         [(set Float32Regs:$d, (OpNode Int16Regs:$a))]>;
2434   def CVTf32i32 :
2435     NVPTXInst<(outs Float32Regs:$d), (ins Int32Regs:$a),
2436               !strconcat(!strconcat("cvt.rn.f32.", OpStr), "32 \t$d, $a;"),
2437         [(set Float32Regs:$d, (OpNode Int32Regs:$a))]>;
2438   def CVTf32i64:
2439     NVPTXInst<(outs Float32Regs:$d), (ins Int64Regs:$a),
2440           !strconcat(!strconcat("cvt.rn.f32.", OpStr), "64 \t$d, $a;"),
2441             [(set Float32Regs:$d, (OpNode Int64Regs:$a))]>;
2442
2443   def CVTf64i1 :
2444     NVPTXInst<(outs Float64Regs:$d), (ins Int1Regs:$a),
2445               "selp.f64 \t$d, 1.0, 0.0, $a;",
2446         [(set Float64Regs:$d, (OpNode Int1Regs:$a))]>;
2447   def CVTf64i8 :
2448     NVPTXInst<(outs Float64Regs:$d), (ins Int8Regs:$a),
2449               !strconcat(!strconcat("cvt.rn.f64.", OpStr), "8 \t$d, $a;"),
2450         [(set Float64Regs:$d, (OpNode Int8Regs:$a))]>;
2451   def CVTf64i16 :
2452     NVPTXInst<(outs Float64Regs:$d), (ins Int16Regs:$a),
2453               !strconcat(!strconcat("cvt.rn.f64.", OpStr), "16 \t$d, $a;"),
2454         [(set Float64Regs:$d, (OpNode Int16Regs:$a))]>;
2455   def CVTf64i32 :
2456     NVPTXInst<(outs Float64Regs:$d), (ins Int32Regs:$a),
2457               !strconcat(!strconcat("cvt.rn.f64.", OpStr), "32 \t$d, $a;"),
2458         [(set Float64Regs:$d, (OpNode Int32Regs:$a))]>;
2459   def CVTf64i64:
2460     NVPTXInst<(outs Float64Regs:$d), (ins Int64Regs:$a),
2461           !strconcat(!strconcat("cvt.rn.f64.", OpStr), "64 \t$d, $a;"),
2462             [(set Float64Regs:$d, (OpNode Int64Regs:$a))]>;
2463 }
2464
2465 defm Sint_to_fp : CVT_INT_TO_FP <"s", sint_to_fp>;
2466 defm Uint_to_fp : CVT_INT_TO_FP <"u", uint_to_fp>;
2467
2468 multiclass CVT_FP_TO_INT <string OpStr, SDNode OpNode> {
2469 // FIXME: need to add f16 support
2470 //  def CVTi8f16:
2471 //    NVPTXInst<(outs Int8Regs:$d), (ins Float16Regs:$a),
2472 //              !strconcat(!strconcat("cvt.rzi.", OpStr), "8.f16 $d, $a;"),
2473 //        [(set Int8Regs:$d, (OpNode Float16Regs:$a))]>;
2474   def CVTi8f32_ftz:
2475     NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
2476               !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
2477         [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2478   def CVTi8f32:
2479     NVPTXInst<(outs Int8Regs:$d), (ins Float32Regs:$a),
2480               !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
2481         [(set Int8Regs:$d, (OpNode Float32Regs:$a))]>;
2482   def CVTi8f64:
2483     NVPTXInst<(outs Int8Regs:$d), (ins Float64Regs:$a),
2484               !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
2485         [(set Int8Regs:$d, (OpNode Float64Regs:$a))]>;
2486
2487 // FIXME: need to add f16 support
2488 //  def CVTi16f16:
2489 //    NVPTXInst<(outs Int16Regs:$d), (ins Float16Regs:$a),
2490 //              !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f16 \t$d, $a;"),
2491 //        [(set Int16Regs:$d, (OpNode Float16Regs:$a))]>;
2492   def CVTi16f32_ftz:
2493     NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
2494               !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "16.f32 \t$d, $a;"),
2495         [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2496   def CVTi16f32:
2497     NVPTXInst<(outs Int16Regs:$d), (ins Float32Regs:$a),
2498               !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f32 \t$d, $a;"),
2499         [(set Int16Regs:$d, (OpNode Float32Regs:$a))]>;
2500   def CVTi16f64:
2501     NVPTXInst<(outs Int16Regs:$d), (ins Float64Regs:$a),
2502               !strconcat(!strconcat("cvt.rzi.", OpStr), "16.f64 \t$d, $a;"),
2503         [(set Int16Regs:$d, (OpNode Float64Regs:$a))]>;
2504
2505 // FIXME: need to add f16 support
2506 //  def CVTi32f16:  def CVTi32f16:
2507 //    NVPTXInst<(outs Int32Regs:$d), (ins Float16Regs:$a),
2508 //              !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f16 \t$d, $a;"),
2509 //        [(set Int32Regs:$d, (OpNode Float16Regs:$a))]>;
2510   def CVTi32f32_ftz:
2511     NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
2512               !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "32.f32 \t$d, $a;"),
2513         [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2514   def CVTi32f32:
2515     NVPTXInst<(outs Int32Regs:$d), (ins Float32Regs:$a),
2516               !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f32 \t$d, $a;"),
2517         [(set Int32Regs:$d, (OpNode Float32Regs:$a))]>;
2518   def CVTi32f64:
2519     NVPTXInst<(outs Int32Regs:$d), (ins Float64Regs:$a),
2520               !strconcat(!strconcat("cvt.rzi.", OpStr), "32.f64 \t$d, $a;"),
2521         [(set Int32Regs:$d, (OpNode Float64Regs:$a))]>;
2522
2523 // FIXME: need to add f16 support
2524 //  def CVTi64f16:
2525 //    NVPTXInst<(outs Int64Regs:$d), (ins Float16Regs:$a),
2526 //              !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f16 \t$d, $a;"),
2527 //        [(set Int64Regs:$d, (OpNode Float16Regs:$a))]>;
2528   def CVTi64f32_ftz:
2529     NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
2530               !strconcat(!strconcat("cvt.rzi.ftz.", OpStr), "64.f32 \t$d, $a;"),
2531         [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2532   def CVTi64f32:
2533     NVPTXInst<(outs Int64Regs:$d), (ins Float32Regs:$a),
2534               !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f32 \t$d, $a;"),
2535         [(set Int64Regs:$d, (OpNode Float32Regs:$a))]>;
2536   def CVTi64f64:
2537     NVPTXInst<(outs Int64Regs:$d), (ins Float64Regs:$a),
2538               !strconcat(!strconcat("cvt.rzi.", OpStr), "64.f64 \t$d, $a;"),
2539         [(set Int64Regs:$d, (OpNode Float64Regs:$a))]>;
2540 }
2541
2542 defm Fp_to_sint : CVT_FP_TO_INT <"s", fp_to_sint>;
2543 defm Fp_to_uint : CVT_FP_TO_INT <"u", fp_to_uint>;
2544
2545 multiclass INT_EXTEND_UNSIGNED_1 <SDNode OpNode> {
2546   def ext1to8:
2547        NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
2548            "selp.u16 \t$d, 1, 0, $a;",
2549      [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
2550   def ext1to16:
2551        NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
2552            "selp.u16 \t$d, 1, 0, $a;",
2553      [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
2554   def ext1to32:
2555        NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
2556            "selp.u32 \t$d, 1, 0, $a;",
2557      [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
2558   def ext1to64:
2559        NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
2560            "selp.u64 \t$d, 1, 0, $a;",
2561      [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
2562 }
2563
2564 multiclass INT_EXTEND_SIGNED_1 <SDNode OpNode> {
2565   def ext1to8:
2566        NVPTXInst<(outs Int8Regs:$d), (ins Int1Regs:$a),
2567            "selp.s16 \t$d, -1, 0, $a;",
2568      [(set Int8Regs:$d, (OpNode Int1Regs:$a))]>;
2569   def ext1to16:
2570        NVPTXInst<(outs Int16Regs:$d), (ins Int1Regs:$a),
2571            "selp.s16 \t$d, -1, 0, $a;",
2572      [(set Int16Regs:$d, (OpNode Int1Regs:$a))]>;
2573   def ext1to32:
2574        NVPTXInst<(outs Int32Regs:$d), (ins Int1Regs:$a),
2575            "selp.s32 \t$d, -1, 0, $a;",
2576      [(set Int32Regs:$d, (OpNode Int1Regs:$a))]>;
2577   def ext1to64:
2578        NVPTXInst<(outs Int64Regs:$d), (ins Int1Regs:$a),
2579            "selp.s64 \t$d, -1, 0, $a;",
2580      [(set Int64Regs:$d, (OpNode Int1Regs:$a))]>;
2581 }
2582
2583 multiclass INT_EXTEND <string OpStr, SDNode OpNode> {
2584   // All Int8Regs are emiited as 16bit registers in ptx.
2585   // And there is no selp.u8 in ptx.
2586   def ext8to16:
2587        NVPTXInst<(outs Int16Regs:$d), (ins Int8Regs:$a),
2588            !strconcat("cvt.", !strconcat(OpStr, !strconcat("16.",
2589              !strconcat(OpStr, "8 \t$d, $a;")))),
2590      [(set Int16Regs:$d, (OpNode Int8Regs:$a))]>;
2591   def ext8to32:
2592        NVPTXInst<(outs Int32Regs:$d), (ins Int8Regs:$a),
2593            !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
2594              !strconcat(OpStr, "8 \t$d, $a;")))),
2595      [(set Int32Regs:$d, (OpNode Int8Regs:$a))]>;
2596   def ext8to64:
2597        NVPTXInst<(outs Int64Regs:$d), (ins Int8Regs:$a),
2598            !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
2599              !strconcat(OpStr, "8 \t$d, $a;")))),
2600      [(set Int64Regs:$d, (OpNode Int8Regs:$a))]>;
2601   def ext16to32:
2602        NVPTXInst<(outs Int32Regs:$d), (ins Int16Regs:$a),
2603            !strconcat("cvt.", !strconcat(OpStr, !strconcat("32.",
2604              !strconcat(OpStr, "16 \t$d, $a;")))),
2605      [(set Int32Regs:$d, (OpNode Int16Regs:$a))]>;
2606   def ext16to64:
2607        NVPTXInst<(outs Int64Regs:$d), (ins Int16Regs:$a),
2608            !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
2609              !strconcat(OpStr, "16 \t$d, $a;")))),
2610      [(set Int64Regs:$d, (OpNode Int16Regs:$a))]>;
2611   def ext32to64:
2612        NVPTXInst<(outs Int64Regs:$d), (ins Int32Regs:$a),
2613            !strconcat("cvt.", !strconcat(OpStr, !strconcat("64.",
2614              !strconcat(OpStr, "32 \t$d, $a;")))),
2615      [(set Int64Regs:$d, (OpNode Int32Regs:$a))]>;
2616 }
2617
2618 defm Sint_extend_1 : INT_EXTEND_SIGNED_1<sext>;
2619 defm Zint_extend_1 : INT_EXTEND_UNSIGNED_1<zext>;
2620 defm Aint_extend_1 : INT_EXTEND_UNSIGNED_1<anyext>;
2621
2622 defm Sint_extend : INT_EXTEND <"s", sext>;
2623 defm Zint_extend : INT_EXTEND <"u", zext>;
2624 defm Aint_extend : INT_EXTEND <"u", anyext>;
2625
2626 class TRUNC_to1_asm<string sz> {
2627   string s = !strconcat("{{\n\t",
2628              !strconcat(".reg ",
2629              !strconcat(sz,
2630              !strconcat(" temp;\n\t",
2631              !strconcat("and",
2632              !strconcat(sz,
2633              !strconcat("\t temp, $a, 1;\n\t",
2634              !strconcat("setp",
2635              !strconcat(sz, ".eq \t $d, temp, 1;\n\t}}")))))))));
2636 }
2637
2638 def TRUNC_64to32 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
2639              "cvt.u32.u64 \t$d, $a;",
2640        [(set Int32Regs:$d, (trunc Int64Regs:$a))]>;
2641 def TRUNC_64to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int64Regs:$a),
2642              "cvt.u16.u64 \t$d, $a;",
2643        [(set Int16Regs:$d, (trunc Int64Regs:$a))]>;
2644 def TRUNC_64to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int64Regs:$a),
2645              "cvt.u8.u64 \t$d, $a;",
2646        [(set Int8Regs:$d, (trunc Int64Regs:$a))]>;
2647 def TRUNC_32to16 : NVPTXInst<(outs Int16Regs:$d), (ins Int32Regs:$a),
2648              "cvt.u16.u32 \t$d, $a;",
2649        [(set Int16Regs:$d, (trunc Int32Regs:$a))]>;
2650 def TRUNC_32to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int32Regs:$a),
2651              "cvt.u8.u32 \t$d, $a;",
2652        [(set Int8Regs:$d, (trunc Int32Regs:$a))]>;
2653 def TRUNC_16to8 : NVPTXInst<(outs Int8Regs:$d), (ins Int16Regs:$a),
2654              "cvt.u8.u16 \t$d, $a;",
2655        [(set Int8Regs:$d, (trunc Int16Regs:$a))]>;
2656 def TRUNC_64to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
2657              TRUNC_to1_asm<".b64">.s,
2658              [(set Int1Regs:$d, (trunc Int64Regs:$a))]>;
2659 def TRUNC_32to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
2660              TRUNC_to1_asm<".b32">.s,
2661              [(set Int1Regs:$d, (trunc Int32Regs:$a))]>;
2662 def TRUNC_16to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int16Regs:$a),
2663              TRUNC_to1_asm<".b16">.s,
2664              [(set Int1Regs:$d, (trunc Int16Regs:$a))]>;
2665 def TRUNC_8to1 : NVPTXInst<(outs Int1Regs:$d), (ins Int8Regs:$a),
2666              TRUNC_to1_asm<".b16">.s,
2667              [(set Int1Regs:$d, (trunc Int8Regs:$a))]>;
2668
2669 // Select instructions
2670 def : Pat<(select Int32Regs:$pred, Int8Regs:$a, Int8Regs:$b),
2671           (SELECTi8rr Int8Regs:$a, Int8Regs:$b, (TRUNC_32to1 Int32Regs:$pred))>;
2672 def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b),
2673           (SELECTi16rr Int16Regs:$a, Int16Regs:$b,
2674             (TRUNC_32to1 Int32Regs:$pred))>;
2675 def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b),
2676           (SELECTi32rr Int32Regs:$a, Int32Regs:$b,
2677             (TRUNC_32to1 Int32Regs:$pred))>;
2678 def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
2679           (SELECTi64rr Int64Regs:$a, Int64Regs:$b,
2680             (TRUNC_32to1 Int32Regs:$pred))>;
2681 def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
2682           (SELECTf32rr Float32Regs:$a, Float32Regs:$b,
2683             (TRUNC_32to1 Int32Regs:$pred))>;
2684 def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
2685           (SELECTf64rr Float64Regs:$a, Float64Regs:$b,
2686             (TRUNC_32to1 Int32Regs:$pred))>;
2687
2688 class F_BITCONVERT<string SzStr, NVPTXRegClass regclassIn,
2689   NVPTXRegClass regclassOut> :
2690            NVPTXInst<(outs regclassOut:$d), (ins regclassIn:$a),
2691            !strconcat("mov.b", !strconcat(SzStr, " \t $d, $a;")),
2692      [(set regclassOut:$d, (bitconvert regclassIn:$a))]>;
2693
2694 def BITCONVERT_32_I2F : F_BITCONVERT<"32", Int32Regs, Float32Regs>;
2695 def BITCONVERT_32_F2I : F_BITCONVERT<"32", Float32Regs, Int32Regs>;
2696 def BITCONVERT_64_I2F : F_BITCONVERT<"64", Int64Regs, Float64Regs>;
2697 def BITCONVERT_64_F2I : F_BITCONVERT<"64", Float64Regs, Int64Regs>;
2698
2699 // pack a set of smaller int registers to a larger int register
2700 def V4I8toI32 : NVPTXInst<(outs Int32Regs:$d),
2701                           (ins Int8Regs:$s1, Int8Regs:$s2,
2702                                Int8Regs:$s3, Int8Regs:$s4),
2703                           !strconcat("{{\n\t.reg .b8\t%t<4>;",
2704                           !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
2705                           !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
2706                           !strconcat("\n\tcvt.u8.u8\t%t2, $s3;",
2707                           !strconcat("\n\tcvt.u8.u8\t%t3, $s4;",
2708                            "\n\tmov.b32\t$d, {%t0, %t1, %t2, %t3};\n\t}}"))))),
2709                           []>;
2710 def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
2711                           (ins Int16Regs:$s1, Int16Regs:$s2,
2712                                Int16Regs:$s3, Int16Regs:$s4),
2713                           "mov.b64\t$d, {{$s1, $s2, $s3, $s4}};",
2714                           []>;
2715 def V2I8toI16 : NVPTXInst<(outs Int16Regs:$d),
2716                           (ins Int8Regs:$s1, Int8Regs:$s2),
2717                           !strconcat("{{\n\t.reg .b8\t%t<2>;",
2718                           !strconcat("\n\tcvt.u8.u8\t%t0, $s1;",
2719                           !strconcat("\n\tcvt.u8.u8\t%t1, $s2;",
2720                                      "\n\tmov.b16\t$d, {%t0, %t1};\n\t}}"))),
2721                           []>;
2722 def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
2723                           (ins Int16Regs:$s1, Int16Regs:$s2),
2724                           "mov.b32\t$d, {{$s1, $s2}};",
2725                           []>;
2726 def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
2727                           (ins Int32Regs:$s1, Int32Regs:$s2),
2728                           "mov.b64\t$d, {{$s1, $s2}};",
2729                           []>;
2730 def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
2731                           (ins Float32Regs:$s1, Float32Regs:$s2),
2732                           "mov.b64\t$d, {{$s1, $s2}};",
2733                           []>;
2734
2735 // unpack a larger int register to a set of smaller int registers
2736 def I32toV4I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2,
2737                                 Int8Regs:$d3, Int8Regs:$d4),
2738                           (ins Int32Regs:$s),
2739                           !strconcat("{{\n\t.reg .b8\t%t<4>;",
2740                           !strconcat("\n\tmov.b32\t{%t0, %t1, %t2, %t3}, $s;",
2741                           !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
2742                           !strconcat("\n\tcvt.u8.u8\t$d2, %t1;",
2743                           !strconcat("\n\tcvt.u8.u8\t$d3, %t2;",
2744                                      "\n\tcvt.u8.u8\t$d4, %t3;\n\t}}"))))),
2745                           []>;
2746 def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
2747                                  Int16Regs:$d3, Int16Regs:$d4),
2748                            (ins Int64Regs:$s),
2749                            "mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;",
2750                           []>;
2751 def I16toV2I8 : NVPTXInst<(outs Int8Regs:$d1, Int8Regs:$d2),
2752                           (ins Int16Regs:$s),
2753                           !strconcat("{{\n\t.reg .b8\t%t<2>;",
2754                           !strconcat("\n\tmov.b16\t{%t0, %t1}, $s;",
2755                           !strconcat("\n\tcvt.u8.u8\t$d1, %t0;",
2756                                      "\n\tcvt.u8.u8\t$d2, %t1;\n\t}}"))),
2757                           []>;
2758 def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
2759                            (ins Int32Regs:$s),
2760                            "mov.b32\t{{$d1, $d2}}, $s;",
2761                           []>;
2762 def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
2763                            (ins Int64Regs:$s),
2764                            "mov.b64\t{{$d1, $d2}}, $s;",
2765                           []>;
2766 def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
2767                            (ins Float64Regs:$s),
2768                            "mov.b64\t{{$d1, $d2}}, $s;",
2769                           []>;
2770
2771 def FPRound_ftz : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
2772             "cvt.rn.ftz.f32.f64 \t$d, $a;",
2773       [(set Float32Regs:$d, (fround Float64Regs:$a))]>, Requires<[doF32FTZ]>;
2774
2775 def FPRound : NVPTXInst<(outs Float32Regs:$d), (ins Float64Regs:$a),
2776             "cvt.rn.f32.f64 \t$d, $a;",
2777       [(set Float32Regs:$d, (fround Float64Regs:$a))]>;
2778
2779 def FPExtend_ftz : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
2780             "cvt.ftz.f64.f32 \t$d, $a;",
2781       [(set Float64Regs:$d, (fextend Float32Regs:$a))]>, Requires<[doF32FTZ]>;
2782
2783 def FPExtend : NVPTXInst<(outs Float64Regs:$d), (ins Float32Regs:$a),
2784             "cvt.f64.f32 \t$d, $a;",
2785       [(set Float64Regs:$d, (fextend Float32Regs:$a))]>;
2786
2787 def retflag       : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
2788                            [SDNPHasChain, SDNPOptInGlue]>;
2789
2790 //-----------------------------------
2791 // Control-flow
2792 //-----------------------------------
2793
2794 let isTerminator=1 in {
2795    let isReturn=1, isBarrier=1 in
2796       def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>;
2797
2798    let isBranch=1 in
2799       def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2800                           "@$a bra \t$target;",
2801                            [(brcond Int1Regs:$a, bb:$target)]>;
2802    let isBranch=1 in
2803       def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
2804                           "@!$a bra \t$target;",
2805                            []>;
2806
2807    let isBranch=1, isBarrier=1 in
2808       def GOTO : NVPTXInst<(outs), (ins brtarget:$target),
2809                         "bra.uni \t$target;",
2810                   [(br bb:$target)]>;
2811 }
2812
2813 def : Pat<(brcond Int32Regs:$a, bb:$target), (CBranch
2814     (ISetUNEi32ri_p Int32Regs:$a, 0), bb:$target)>;
2815
2816 // SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
2817 // conditional branch if
2818 // the target block is the next block so that the code can fall through to the
2819 // target block.
2820 // The invertion is done by 'xor condition, 1', which will be translated to
2821 // (setne condition, -1).
2822 // Since ptx supports '@!pred bra target', we should use it.
2823 def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target),
2824   (CBranchOther Int1Regs:$a, bb:$target)>;
2825
2826 // Call
2827 def SDT_NVPTXCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
2828 def SDT_NVPTXCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
2829                                         SDTCisVT<1, i32> ]>;
2830
2831 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart,
2832                            [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
2833 def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_NVPTXCallSeqEnd,
2834                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
2835                            SDNPSideEffect]>;
2836
2837 def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
2838 def call          : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
2839                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
2840 def calltarget : Operand<i32>;
2841 let isCall=1 in {
2842    def CALL : NVPTXInst<(outs), (ins calltarget:$dst),
2843                   "call \t$dst, (1);", []>;
2844 }
2845
2846 def : Pat<(call tglobaladdr:$dst),
2847           (CALL tglobaladdr:$dst)>;
2848 def : Pat<(call texternalsym:$dst),
2849           (CALL texternalsym:$dst)>;
2850
2851 // Pseudo instructions.
2852 class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
2853    : NVPTXInst<outs, ins, asmstr, pattern>;
2854
2855 // @TODO: We use some tricks here to emit curly braces.  Can we clean this up
2856 // a bit without TableGen modifications?
2857 def Callseq_Start : NVPTXInst<(outs), (ins i32imm:$amt),
2858   "// Callseq Start $amt\n\t{{\n\t.reg .b32 temp_param_reg;\n\t// <end>}}",
2859                                [(callseq_start timm:$amt)]>;
2860 def Callseq_End : NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
2861   "\n\t//{{\n\t}}// Callseq End $amt1",
2862                             [(callseq_end timm:$amt1, timm:$amt2)]>;
2863
2864 // trap instruction
2865
2866 def trapinst : NVPTXInst<(outs), (ins),
2867                          "trap;",
2868                          [(trap)]>;
2869
2870 include "NVPTXIntrinsics.td"
2871
2872
2873 //-----------------------------------
2874 // Notes
2875 //-----------------------------------
2876 // BSWAP is currently expanded. The following is a more efficient
2877 // - for < sm_20, use vector scalar mov, as tesla support native 16-bit register
2878 // - for sm_20, use pmpt (use vector scalar mov to get the pack and
2879 //   unpack). sm_20 supports native 32-bit register, but not native 16-bit
2880 // register.