1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT mapping to useful data for Cell SPU
41 struct valtype_map_s {
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDValue &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDValue &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // SPU has no sign or zero extended loads for i1, i8, i16:
135 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
136 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
137 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
138 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
139 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
140 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
141 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
142 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
144 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
145 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
146 setTruncStoreAction(MVT::i8, MVT::i1, Promote);
149 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
150 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
151 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
152 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
153 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
154 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
155 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
156 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
158 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
159 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
160 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
162 // SPU constant load actions are custom lowered:
163 setOperationAction(ISD::Constant, MVT::i64, Custom);
164 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
165 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
167 // SPU's loads and stores have to be custom lowered:
168 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
170 MVT VT = (MVT::SimpleValueType)sctype;
172 setOperationAction(ISD::LOAD, VT, Custom);
173 setOperationAction(ISD::STORE, VT, Custom);
176 // Custom lower BRCOND for i1, i8 to "promote" the result to
177 // i32 and i16, respectively.
178 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
180 // Expand the jumptable branches
181 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
182 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
183 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
185 // SPU has no intrinsics for these particular operations:
186 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
188 // PowerPC has no SREM/UREM instructions
189 setOperationAction(ISD::SREM, MVT::i32, Expand);
190 setOperationAction(ISD::UREM, MVT::i32, Expand);
191 setOperationAction(ISD::SREM, MVT::i64, Expand);
192 setOperationAction(ISD::UREM, MVT::i64, Expand);
194 // We don't support sin/cos/sqrt/fmod
195 setOperationAction(ISD::FSIN , MVT::f64, Expand);
196 setOperationAction(ISD::FCOS , MVT::f64, Expand);
197 setOperationAction(ISD::FREM , MVT::f64, Expand);
198 setOperationAction(ISD::FSIN , MVT::f32, Expand);
199 setOperationAction(ISD::FCOS , MVT::f32, Expand);
200 setOperationAction(ISD::FREM , MVT::f32, Expand);
202 // If we're enabling GP optimizations, use hardware square root
203 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
204 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
206 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
207 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
209 // SPU can do rotate right and left, so legalize it... but customize for i8
210 // because instructions don't exist.
212 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
214 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
215 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
216 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
218 setOperationAction(ISD::ROTL, MVT::i32, Legal);
219 setOperationAction(ISD::ROTL, MVT::i16, Legal);
220 setOperationAction(ISD::ROTL, MVT::i8, Custom);
221 // SPU has no native version of shift left/right for i8
222 setOperationAction(ISD::SHL, MVT::i8, Custom);
223 setOperationAction(ISD::SRL, MVT::i8, Custom);
224 setOperationAction(ISD::SRA, MVT::i8, Custom);
225 // And SPU needs custom lowering for shift left/right for i64
226 setOperationAction(ISD::SHL, MVT::i64, Custom);
227 setOperationAction(ISD::SRL, MVT::i64, Custom);
228 setOperationAction(ISD::SRA, MVT::i64, Custom);
230 // Custom lower i8, i32 and i64 multiplications
231 setOperationAction(ISD::MUL, MVT::i8, Custom);
232 setOperationAction(ISD::MUL, MVT::i32, Custom);
233 setOperationAction(ISD::MUL, MVT::i64, Custom);
235 // Need to custom handle (some) common i8, i64 math ops
236 setOperationAction(ISD::ADD, MVT::i64, Custom);
237 setOperationAction(ISD::SUB, MVT::i8, Custom);
238 setOperationAction(ISD::SUB, MVT::i64, Custom);
240 // SPU does not have BSWAP. It does have i32 support CTLZ.
241 // CTPOP has to be custom lowered.
242 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
243 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
245 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
246 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
247 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
248 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
250 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
251 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
253 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
255 // SPU has a version of select that implements (a&~c)|(b&c), just like
256 // select ought to work:
257 setOperationAction(ISD::SELECT, MVT::i1, Promote);
258 setOperationAction(ISD::SELECT, MVT::i8, Legal);
259 setOperationAction(ISD::SELECT, MVT::i16, Legal);
260 setOperationAction(ISD::SELECT, MVT::i32, Legal);
261 setOperationAction(ISD::SELECT, MVT::i64, Expand);
263 setOperationAction(ISD::SETCC, MVT::i1, Promote);
264 setOperationAction(ISD::SETCC, MVT::i8, Legal);
265 setOperationAction(ISD::SETCC, MVT::i16, Legal);
266 setOperationAction(ISD::SETCC, MVT::i32, Legal);
267 setOperationAction(ISD::SETCC, MVT::i64, Expand);
269 // Zero extension and sign extension for i64 have to be
271 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
272 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
273 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
275 // SPU has a legal FP -> signed INT instruction
276 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
277 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
278 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
279 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
281 // FDIV on SPU requires custom lowering
282 setOperationAction(ISD::FDIV, MVT::f32, Custom);
283 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
285 // SPU has [U|S]INT_TO_FP
286 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
287 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
288 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
289 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
290 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
291 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
292 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
293 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
295 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
296 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
297 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
298 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
300 // We cannot sextinreg(i1). Expand to shifts.
301 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
303 // Support label based line numbers.
304 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
305 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
307 // We want to legalize GlobalAddress and ConstantPool nodes into the
308 // appropriate instructions to materialize the address.
309 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
311 MVT VT = (MVT::SimpleValueType)sctype;
313 setOperationAction(ISD::GlobalAddress, VT, Custom);
314 setOperationAction(ISD::ConstantPool, VT, Custom);
315 setOperationAction(ISD::JumpTable, VT, Custom);
318 // RET must be custom lowered, to meet ABI requirements
319 setOperationAction(ISD::RET, MVT::Other, Custom);
321 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
322 setOperationAction(ISD::VASTART , MVT::Other, Custom);
324 // Use the default implementation.
325 setOperationAction(ISD::VAARG , MVT::Other, Expand);
326 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
327 setOperationAction(ISD::VAEND , MVT::Other, Expand);
328 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
329 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
330 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
331 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
333 // Cell SPU has instructions for converting between i64 and fp.
334 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
335 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
337 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
338 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
340 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
341 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
343 // First set operation action for all vector types to expand. Then we
344 // will selectively turn on ones that can be effectively codegen'd.
345 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
346 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
347 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
348 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
349 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
350 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
352 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
353 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
354 MVT VT = (MVT::SimpleValueType)i;
356 // add/sub are legal for all supported vector VT's.
357 setOperationAction(ISD::ADD , VT, Legal);
358 setOperationAction(ISD::SUB , VT, Legal);
359 // mul has to be custom lowered.
360 setOperationAction(ISD::MUL , VT, Custom);
362 setOperationAction(ISD::AND , VT, Legal);
363 setOperationAction(ISD::OR , VT, Legal);
364 setOperationAction(ISD::XOR , VT, Legal);
365 setOperationAction(ISD::LOAD , VT, Legal);
366 setOperationAction(ISD::SELECT, VT, Legal);
367 setOperationAction(ISD::STORE, VT, Legal);
369 // These operations need to be expanded:
370 setOperationAction(ISD::SDIV, VT, Expand);
371 setOperationAction(ISD::SREM, VT, Expand);
372 setOperationAction(ISD::UDIV, VT, Expand);
373 setOperationAction(ISD::UREM, VT, Expand);
374 setOperationAction(ISD::FDIV, VT, Custom);
376 // Custom lower build_vector, constant pool spills, insert and
377 // extract vector elements:
378 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
379 setOperationAction(ISD::ConstantPool, VT, Custom);
380 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
381 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
382 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
383 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
386 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
387 setOperationAction(ISD::AND, MVT::v16i8, Custom);
388 setOperationAction(ISD::OR, MVT::v16i8, Custom);
389 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
390 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
392 setShiftAmountType(MVT::i32);
393 setSetCCResultContents(ZeroOrOneSetCCResult);
395 setStackPointerRegisterToSaveRestore(SPU::R1);
397 // We have target-specific dag combine patterns for the following nodes:
398 setTargetDAGCombine(ISD::ADD);
399 setTargetDAGCombine(ISD::ZERO_EXTEND);
400 setTargetDAGCombine(ISD::SIGN_EXTEND);
401 setTargetDAGCombine(ISD::ANY_EXTEND);
403 computeRegisterProperties();
407 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
409 if (node_names.empty()) {
410 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
411 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
412 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
413 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
414 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
415 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
416 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
417 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
418 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
419 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
420 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
421 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
422 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
423 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
424 = "SPUISD::EXTRACT_ELT0_CHAINED";
425 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
426 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
427 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
428 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
429 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
430 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
431 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
432 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
433 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
434 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
435 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
436 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
437 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
438 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
439 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
440 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
441 "SPUISD::ROTQUAD_RZ_BYTES";
442 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
443 "SPUISD::ROTQUAD_RZ_BITS";
444 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
445 "SPUISD::ROTBYTES_RIGHT_S";
446 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
447 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
448 "SPUISD::ROTBYTES_LEFT_CHAINED";
449 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
450 "SPUISD::ROTBYTES_LEFT_BITS";
451 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
452 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
453 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
454 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
455 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
456 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
457 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
458 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
459 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
462 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
464 return ((i != node_names.end()) ? i->second : 0);
467 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
468 MVT VT = Op.getValueType();
469 return (VT.isInteger() ? VT : MVT(MVT::i32));
472 //===----------------------------------------------------------------------===//
473 // Calling convention code:
474 //===----------------------------------------------------------------------===//
476 #include "SPUGenCallingConv.inc"
478 //===----------------------------------------------------------------------===//
479 // LowerOperation implementation
480 //===----------------------------------------------------------------------===//
482 /// Aligned load common code for CellSPU
484 \param[in] Op The SelectionDAG load or store operand
485 \param[in] DAG The selection DAG
486 \param[in] ST CellSPU subtarget information structure
487 \param[in,out] alignment Caller initializes this to the load or store node's
488 value from getAlignment(), may be updated while generating the aligned load
489 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
490 offset (divisible by 16, modulo 16 == 0)
491 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
492 offset of the preferred slot (modulo 16 != 0)
493 \param[in,out] VT Caller initializes this value type to the the load or store
494 node's loaded or stored value type; may be updated if an i1-extended load or
496 \param[out] was16aligned true if the base pointer had 16-byte alignment,
497 otherwise false. Can help to determine if the chunk needs to be rotated.
499 Both load and store lowering load a block of data aligned on a 16-byte
500 boundary. This is the common aligned load code shared between both.
503 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
505 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
506 MVT &VT, bool &was16aligned)
508 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
509 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
510 SDValue basePtr = LSN->getBasePtr();
511 SDValue chain = LSN->getChain();
513 if (basePtr.getOpcode() == ISD::ADD) {
514 SDValue Op1 = basePtr.getNode()->getOperand(1);
516 if (Op1.getOpcode() == ISD::Constant
517 || Op1.getOpcode() == ISD::TargetConstant) {
518 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
520 alignOffs = (int) CN->getZExtValue();
521 prefSlotOffs = (int) (alignOffs & 0xf);
523 // Adjust the rotation amount to ensure that the final result ends up in
524 // the preferred slot:
525 prefSlotOffs -= vtm->prefslot_byte;
526 basePtr = basePtr.getOperand(0);
528 // Loading from memory, can we adjust alignment?
529 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
530 SDValue APtr = basePtr.getOperand(0);
531 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
532 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
533 alignment = GSDN->getGlobal()->getAlignment();
538 prefSlotOffs = -vtm->prefslot_byte;
540 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
541 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
542 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
543 prefSlotOffs = (int) (alignOffs & 0xf);
544 prefSlotOffs -= vtm->prefslot_byte;
545 basePtr = DAG.getRegister(SPU::R1, VT);
548 prefSlotOffs = -vtm->prefslot_byte;
551 if (alignment == 16) {
552 // Realign the base pointer as a D-Form address:
553 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
554 basePtr = DAG.getNode(ISD::ADD, PtrVT,
556 DAG.getConstant((alignOffs & ~0xf), PtrVT));
559 // Emit the vector load:
561 return DAG.getLoad(MVT::v16i8, chain, basePtr,
562 LSN->getSrcValue(), LSN->getSrcValueOffset(),
563 LSN->isVolatile(), 16);
566 // Unaligned load or we're using the "large memory" model, which means that
567 // we have to be very pessimistic:
568 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
569 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
570 DAG.getConstant(0, PtrVT));
574 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
575 DAG.getConstant((alignOffs & ~0xf), PtrVT));
576 was16aligned = false;
577 return DAG.getLoad(MVT::v16i8, chain, basePtr,
578 LSN->getSrcValue(), LSN->getSrcValueOffset(),
579 LSN->isVolatile(), 16);
582 /// Custom lower loads for CellSPU
584 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
585 within a 16-byte block, we have to rotate to extract the requested element.
588 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
589 LoadSDNode *LN = cast<LoadSDNode>(Op);
590 SDValue the_chain = LN->getChain();
591 MVT VT = LN->getMemoryVT();
592 MVT OpVT = Op.getNode()->getValueType(0);
593 ISD::LoadExtType ExtType = LN->getExtensionType();
594 unsigned alignment = LN->getAlignment();
597 switch (LN->getAddressingMode()) {
598 case ISD::UNINDEXED: {
602 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
604 if (result.getNode() == 0)
607 the_chain = result.getValue(1);
608 // Rotate the chunk if necessary
611 if (rotamt != 0 || !was16aligned) {
612 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
617 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
619 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
620 LoadSDNode *LN1 = cast<LoadSDNode>(result);
621 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
622 DAG.getConstant(rotamt, PtrVT));
625 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
626 the_chain = result.getValue(1);
629 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
631 MVT vecVT = MVT::v16i8;
633 // Convert the loaded v16i8 vector to the appropriate vector type
634 // specified by the operand:
637 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
639 vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
642 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
643 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
644 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
645 the_chain = result.getValue(1);
647 // Handle the sign and zero-extending loads for i1 and i8:
650 if (ExtType == ISD::SEXTLOAD) {
651 NewOpC = (OpVT == MVT::i1
652 ? SPUISD::EXTRACT_I1_SEXT
653 : SPUISD::EXTRACT_I8_SEXT);
655 assert(ExtType == ISD::ZEXTLOAD);
656 NewOpC = (OpVT == MVT::i1
657 ? SPUISD::EXTRACT_I1_ZEXT
658 : SPUISD::EXTRACT_I8_ZEXT);
661 result = DAG.getNode(NewOpC, OpVT, result);
664 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
665 SDValue retops[2] = {
670 result = DAG.getNode(SPUISD::LDRESULT, retvts,
671 retops, sizeof(retops) / sizeof(retops[0]));
678 case ISD::LAST_INDEXED_MODE:
679 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
681 cerr << (unsigned) LN->getAddressingMode() << "\n";
689 /// Custom lower stores for CellSPU
691 All CellSPU stores are aligned to 16-byte boundaries, so for elements
692 within a 16-byte block, we have to generate a shuffle to insert the
693 requested element into its place, then store the resulting block.
696 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
697 StoreSDNode *SN = cast<StoreSDNode>(Op);
698 SDValue Value = SN->getValue();
699 MVT VT = Value.getValueType();
700 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
701 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
702 unsigned alignment = SN->getAlignment();
704 switch (SN->getAddressingMode()) {
705 case ISD::UNINDEXED: {
706 int chunk_offset, slot_offset;
709 // The vector type we really want to load from the 16-byte chunk, except
710 // in the case of MVT::i1, which has to be v16i8.
711 MVT vecVT = MVT::v16i8, stVecVT = MVT::v16i8;
713 if (StVT != MVT::i1) {
714 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
715 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
718 SDValue alignLoadVec =
719 AlignedLoad(Op, DAG, ST, SN, alignment,
720 chunk_offset, slot_offset, VT, was16aligned);
722 if (alignLoadVec.getNode() == 0)
725 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
726 SDValue basePtr = LN->getBasePtr();
727 SDValue the_chain = alignLoadVec.getValue(1);
728 SDValue theValue = SN->getValue();
732 && (theValue.getOpcode() == ISD::AssertZext
733 || theValue.getOpcode() == ISD::AssertSext)) {
734 // Drill down and get the value for zero- and sign-extended
736 theValue = theValue.getOperand(0);
741 SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
742 SDValue insertEltPtr;
744 // If the base pointer is already a D-form address, then just create
745 // a new D-form address with a slot offset and the orignal base pointer.
746 // Otherwise generate a D-form address with the slot offset relative
747 // to the stack pointer, which is always aligned.
748 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
749 DEBUG(basePtr.getNode()->dump(&DAG));
752 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
753 (basePtr.getOpcode() == ISD::ADD
754 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
755 insertEltPtr = basePtr;
757 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
760 SDValue insertEltOp =
761 DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
765 if (VT == MVT::i1 || StVT != VT) {
766 MVT toVT = (VT != MVT::i1) ? VT : MVT::i8;
767 if (toVT.bitsGT(VT)) {
768 vectorizeOp = DAG.getNode(ISD::ANY_EXTEND, toVT, theValue);
769 } else if (StVT.bitsLT(VT)) {
770 vectorizeOp = DAG.getNode(ISD::TRUNCATE, toVT, theValue);
773 vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, vectorizeOp);
776 vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
778 result = DAG.getNode(SPUISD::SHUFB, vecVT, vectorizeOp, alignLoadVec,
779 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
781 result = DAG.getStore(the_chain, result, basePtr,
782 LN->getSrcValue(), LN->getSrcValueOffset(),
783 LN->isVolatile(), LN->getAlignment());
786 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
787 const SDValue ¤tRoot = DAG.getRoot();
790 cerr << "------- CellSPU:LowerStore result:\n";
793 DAG.setRoot(currentRoot);
804 case ISD::LAST_INDEXED_MODE:
805 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
807 cerr << (unsigned) SN->getAddressingMode() << "\n";
815 /// Generate the address of a constant pool entry.
817 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
818 MVT PtrVT = Op.getValueType();
819 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
820 Constant *C = CP->getConstVal();
821 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
822 SDValue Zero = DAG.getConstant(0, PtrVT);
823 const TargetMachine &TM = DAG.getTarget();
825 if (TM.getRelocationModel() == Reloc::Static) {
826 if (!ST->usingLargeMem()) {
827 // Just return the SDValue with the constant pool address in it.
828 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
830 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
831 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
832 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
837 "LowerConstantPool: Relocation model other than static"
843 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
844 MVT PtrVT = Op.getValueType();
845 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
846 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
847 SDValue Zero = DAG.getConstant(0, PtrVT);
848 const TargetMachine &TM = DAG.getTarget();
850 if (TM.getRelocationModel() == Reloc::Static) {
851 if (!ST->usingLargeMem()) {
852 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
854 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
855 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
856 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
861 "LowerJumpTable: Relocation model other than static not supported.");
866 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
867 MVT PtrVT = Op.getValueType();
868 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
869 GlobalValue *GV = GSDN->getGlobal();
870 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
871 const TargetMachine &TM = DAG.getTarget();
872 SDValue Zero = DAG.getConstant(0, PtrVT);
874 if (TM.getRelocationModel() == Reloc::Static) {
875 if (!ST->usingLargeMem()) {
876 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
878 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
879 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
880 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
883 cerr << "LowerGlobalAddress: Relocation model other than static not "
892 //! Custom lower i64 integer constants
894 This code inserts all of the necessary juggling that needs to occur to load
895 a 64-bit constant into a register.
898 LowerConstant(SDValue Op, SelectionDAG &DAG) {
899 MVT VT = Op.getValueType();
900 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
902 if (VT == MVT::i64) {
903 SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
904 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
905 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
907 cerr << "LowerConstant: unhandled constant type "
917 //! Custom lower double precision floating point constants
919 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
920 MVT VT = Op.getValueType();
921 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
924 "LowerConstantFP: Node is not ConstantFPSDNode");
926 if (VT == MVT::f64) {
927 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
928 return DAG.getNode(ISD::BIT_CONVERT, VT,
929 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
935 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
937 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
939 SDValue Cond = Op.getOperand(1);
940 MVT CondVT = Cond.getValueType();
943 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
944 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
945 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
947 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
950 return SDValue(); // Unchanged
954 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
956 MachineFunction &MF = DAG.getMachineFunction();
957 MachineFrameInfo *MFI = MF.getFrameInfo();
958 MachineRegisterInfo &RegInfo = MF.getRegInfo();
959 SmallVector<SDValue, 48> ArgValues;
960 SDValue Root = Op.getOperand(0);
961 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
963 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
964 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
966 unsigned ArgOffset = SPUFrameInfo::minStackSize();
967 unsigned ArgRegIdx = 0;
968 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
970 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
972 // Add DAG nodes to load the arguments or copy them out of registers.
973 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
974 ArgNo != e; ++ArgNo) {
975 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
976 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
979 if (ArgRegIdx < NumArgRegs) {
980 const TargetRegisterClass *ArgRegClass;
982 switch (ObjectVT.getSimpleVT()) {
984 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
985 << ObjectVT.getMVTString()
990 ArgRegClass = &SPU::R8CRegClass;
993 ArgRegClass = &SPU::R16CRegClass;
996 ArgRegClass = &SPU::R32CRegClass;
999 ArgRegClass = &SPU::R64CRegClass;
1002 ArgRegClass = &SPU::R32FPRegClass;
1005 ArgRegClass = &SPU::R64FPRegClass;
1013 ArgRegClass = &SPU::VECREGRegClass;
1017 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1018 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1019 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1022 // We need to load the argument to a virtual register if we determined
1023 // above that we ran out of physical registers of the appropriate type
1024 // or we're forced to do vararg
1025 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1026 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1027 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1028 ArgOffset += StackSlotSize;
1031 ArgValues.push_back(ArgVal);
1033 Root = ArgVal.getOperand(0);
1038 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1039 // We will spill (79-3)+1 registers to the stack
1040 SmallVector<SDValue, 79-3+1> MemOps;
1042 // Create the frame slot
1044 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1045 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1046 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1047 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1048 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1049 Root = Store.getOperand(0);
1050 MemOps.push_back(Store);
1052 // Increment address by stack slot size for the next stored argument
1053 ArgOffset += StackSlotSize;
1055 if (!MemOps.empty())
1056 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1059 ArgValues.push_back(Root);
1061 // Return the new list of results.
1062 return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1066 /// isLSAAddress - Return the immediate to use if the specified
1067 /// value is representable as a LSA address.
1068 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1069 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1072 int Addr = C->getZExtValue();
1073 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1074 (Addr << 14 >> 14) != Addr)
1075 return 0; // Top 14 bits have to be sext of immediate.
1077 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1082 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1083 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1084 SDValue Chain = TheCall->getChain();
1085 SDValue Callee = TheCall->getCallee();
1086 unsigned NumOps = TheCall->getNumArgs();
1087 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1088 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1089 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1091 // Handy pointer type
1092 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1094 // Accumulate how many bytes are to be pushed on the stack, including the
1095 // linkage area, and parameter passing area. According to the SPU ABI,
1096 // we minimally need space for [LR] and [SP]
1097 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1099 // Set up a copy of the stack pointer for use loading and storing any
1100 // arguments that may not fit in the registers available for argument
1102 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1104 // Figure out which arguments are going to go in registers, and which in
1106 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1107 unsigned ArgRegIdx = 0;
1109 // Keep track of registers passing arguments
1110 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1111 // And the arguments passed on the stack
1112 SmallVector<SDValue, 8> MemOpChains;
1114 for (unsigned i = 0; i != NumOps; ++i) {
1115 SDValue Arg = TheCall->getArg(i);
1117 // PtrOff will be used to store the current argument to the stack if a
1118 // register cannot be found for it.
1119 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1120 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1122 switch (Arg.getValueType().getSimpleVT()) {
1123 default: assert(0 && "Unexpected ValueType for argument!");
1127 if (ArgRegIdx != NumArgRegs) {
1128 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1130 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1131 ArgOffset += StackSlotSize;
1136 if (ArgRegIdx != NumArgRegs) {
1137 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1139 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1140 ArgOffset += StackSlotSize;
1147 if (ArgRegIdx != NumArgRegs) {
1148 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1150 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1151 ArgOffset += StackSlotSize;
1157 // Update number of stack bytes actually used, insert a call sequence start
1158 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1159 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1162 if (!MemOpChains.empty()) {
1163 // Adjust the stack pointer for the stack arguments.
1164 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1165 &MemOpChains[0], MemOpChains.size());
1168 // Build a sequence of copy-to-reg nodes chained together with token chain
1169 // and flag operands which copy the outgoing args into the appropriate regs.
1171 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1172 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1174 InFlag = Chain.getValue(1);
1177 SmallVector<SDValue, 8> Ops;
1178 unsigned CallOpc = SPUISD::CALL;
1180 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1181 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1182 // node so that legalize doesn't hack it.
1183 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1184 GlobalValue *GV = G->getGlobal();
1185 MVT CalleeVT = Callee.getValueType();
1186 SDValue Zero = DAG.getConstant(0, PtrVT);
1187 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1189 if (!ST->usingLargeMem()) {
1190 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1191 // style calls, otherwise, external symbols are BRASL calls. This assumes
1192 // that declared/defined symbols are in the same compilation unit and can
1193 // be reached through PC-relative jumps.
1196 // This may be an unsafe assumption for JIT and really large compilation
1198 if (GV->isDeclaration()) {
1199 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1201 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1204 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1206 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1208 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1209 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1210 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1211 // If this is an absolute destination address that appears to be a legal
1212 // local store address, use the munged value.
1213 Callee = SDValue(Dest, 0);
1216 Ops.push_back(Chain);
1217 Ops.push_back(Callee);
1219 // Add argument registers to the end of the list so that they are known live
1221 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1222 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1223 RegsToPass[i].second.getValueType()));
1225 if (InFlag.getNode())
1226 Ops.push_back(InFlag);
1227 // Returns a chain and a flag for retval copy to use.
1228 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1229 &Ops[0], Ops.size());
1230 InFlag = Chain.getValue(1);
1232 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1233 DAG.getIntPtrConstant(0, true), InFlag);
1234 if (TheCall->getValueType(0) != MVT::Other)
1235 InFlag = Chain.getValue(1);
1237 SDValue ResultVals[3];
1238 unsigned NumResults = 0;
1240 // If the call has results, copy the values out of the ret val registers.
1241 switch (TheCall->getValueType(0).getSimpleVT()) {
1242 default: assert(0 && "Unexpected ret value!");
1243 case MVT::Other: break;
1245 if (TheCall->getValueType(1) == MVT::i32) {
1246 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1247 ResultVals[0] = Chain.getValue(0);
1248 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1249 Chain.getValue(2)).getValue(1);
1250 ResultVals[1] = Chain.getValue(0);
1253 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1254 ResultVals[0] = Chain.getValue(0);
1259 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1260 ResultVals[0] = Chain.getValue(0);
1265 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1266 InFlag).getValue(1);
1267 ResultVals[0] = Chain.getValue(0);
1275 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1276 InFlag).getValue(1);
1277 ResultVals[0] = Chain.getValue(0);
1282 // If the function returns void, just return the chain.
1283 if (NumResults == 0)
1286 // Otherwise, merge everything together with a MERGE_VALUES node.
1287 ResultVals[NumResults++] = Chain;
1288 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1289 return Res.getValue(Op.getResNo());
1293 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1294 SmallVector<CCValAssign, 16> RVLocs;
1295 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1296 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1297 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1298 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1300 // If this is the first return lowered for this function, add the regs to the
1301 // liveout set for the function.
1302 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1303 for (unsigned i = 0; i != RVLocs.size(); ++i)
1304 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1307 SDValue Chain = Op.getOperand(0);
1310 // Copy the result values into the output registers.
1311 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1312 CCValAssign &VA = RVLocs[i];
1313 assert(VA.isRegLoc() && "Can only return in registers!");
1314 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1315 Flag = Chain.getValue(1);
1319 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1321 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1325 //===----------------------------------------------------------------------===//
1326 // Vector related lowering:
1327 //===----------------------------------------------------------------------===//
1329 static ConstantSDNode *
1330 getVecImm(SDNode *N) {
1331 SDValue OpVal(0, 0);
1333 // Check to see if this buildvec has a single non-undef value in its elements.
1334 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1335 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1336 if (OpVal.getNode() == 0)
1337 OpVal = N->getOperand(i);
1338 else if (OpVal != N->getOperand(i))
1342 if (OpVal.getNode() != 0) {
1343 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1348 return 0; // All UNDEF: use implicit def.; not Constant node
1351 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1352 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1354 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1356 if (ConstantSDNode *CN = getVecImm(N)) {
1357 uint64_t Value = CN->getZExtValue();
1358 if (ValueType == MVT::i64) {
1359 uint64_t UValue = CN->getZExtValue();
1360 uint32_t upper = uint32_t(UValue >> 32);
1361 uint32_t lower = uint32_t(UValue);
1364 Value = Value >> 32;
1366 if (Value <= 0x3ffff)
1367 return DAG.getTargetConstant(Value, ValueType);
1373 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1374 /// and the value fits into a signed 16-bit constant, and if so, return the
1376 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1378 if (ConstantSDNode *CN = getVecImm(N)) {
1379 int64_t Value = CN->getSExtValue();
1380 if (ValueType == MVT::i64) {
1381 uint64_t UValue = CN->getZExtValue();
1382 uint32_t upper = uint32_t(UValue >> 32);
1383 uint32_t lower = uint32_t(UValue);
1386 Value = Value >> 32;
1388 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1389 return DAG.getTargetConstant(Value, ValueType);
1396 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1397 /// and the value fits into a signed 10-bit constant, and if so, return the
1399 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1401 if (ConstantSDNode *CN = getVecImm(N)) {
1402 int64_t Value = CN->getSExtValue();
1403 if (ValueType == MVT::i64) {
1404 uint64_t UValue = CN->getZExtValue();
1405 uint32_t upper = uint32_t(UValue >> 32);
1406 uint32_t lower = uint32_t(UValue);
1409 Value = Value >> 32;
1411 if (isS10Constant(Value))
1412 return DAG.getTargetConstant(Value, ValueType);
1418 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1419 /// and the value fits into a signed 8-bit constant, and if so, return the
1422 /// @note: The incoming vector is v16i8 because that's the only way we can load
1423 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1425 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1427 if (ConstantSDNode *CN = getVecImm(N)) {
1428 int Value = (int) CN->getZExtValue();
1429 if (ValueType == MVT::i16
1430 && Value <= 0xffff /* truncated from uint64_t */
1431 && ((short) Value >> 8) == ((short) Value & 0xff))
1432 return DAG.getTargetConstant(Value & 0xff, ValueType);
1433 else if (ValueType == MVT::i8
1434 && (Value & 0xff) == Value)
1435 return DAG.getTargetConstant(Value, ValueType);
1441 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1442 /// and the value fits into a signed 16-bit constant, and if so, return the
1444 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1446 if (ConstantSDNode *CN = getVecImm(N)) {
1447 uint64_t Value = CN->getZExtValue();
1448 if ((ValueType == MVT::i32
1449 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1450 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1451 return DAG.getTargetConstant(Value >> 16, ValueType);
1457 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1458 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1459 if (ConstantSDNode *CN = getVecImm(N)) {
1460 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1466 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1467 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1468 if (ConstantSDNode *CN = getVecImm(N)) {
1469 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1475 // If this is a vector of constants or undefs, get the bits. A bit in
1476 // UndefBits is set if the corresponding element of the vector is an
1477 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1478 // zero. Return true if this is not an array of constants, false if it is.
1480 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1481 uint64_t UndefBits[2]) {
1482 // Start with zero'd results.
1483 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1485 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1486 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1487 SDValue OpVal = BV->getOperand(i);
1489 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1490 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1492 uint64_t EltBits = 0;
1493 if (OpVal.getOpcode() == ISD::UNDEF) {
1494 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1495 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1497 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1498 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1499 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1500 const APFloat &apf = CN->getValueAPF();
1501 EltBits = (CN->getValueType(0) == MVT::f32
1502 ? FloatToBits(apf.convertToFloat())
1503 : DoubleToBits(apf.convertToDouble()));
1505 // Nonconstant element.
1509 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1512 //printf("%llx %llx %llx %llx\n",
1513 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1517 /// If this is a splat (repetition) of a value across the whole vector, return
1518 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1519 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1520 /// SplatSize = 1 byte.
1521 static bool isConstantSplat(const uint64_t Bits128[2],
1522 const uint64_t Undef128[2],
1524 uint64_t &SplatBits, uint64_t &SplatUndef,
1526 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1527 // the same as the lower 64-bits, ignoring undefs.
1528 uint64_t Bits64 = Bits128[0] | Bits128[1];
1529 uint64_t Undef64 = Undef128[0] & Undef128[1];
1530 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1531 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1532 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1533 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1535 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1536 if (MinSplatBits < 64) {
1538 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1540 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1541 if (MinSplatBits < 32) {
1543 // If the top 16-bits are different than the lower 16-bits, ignoring
1544 // undefs, we have an i32 splat.
1545 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1546 if (MinSplatBits < 16) {
1547 // If the top 8-bits are different than the lower 8-bits, ignoring
1548 // undefs, we have an i16 splat.
1549 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1550 == ((Bits16 >> 8) & ~Undef16)) {
1551 // Otherwise, we have an 8-bit splat.
1552 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1553 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1559 SplatUndef = Undef16;
1566 SplatUndef = Undef32;
1572 SplatBits = Bits128[0];
1573 SplatUndef = Undef128[0];
1579 return false; // Can't be a splat if two pieces don't match.
1582 // If this is a case we can't handle, return null and let the default
1583 // expansion code take care of it. If we CAN select this case, and if it
1584 // selects to a single instruction, return Op. Otherwise, if we can codegen
1585 // this case more efficiently than a constant pool load, lower it to the
1586 // sequence of ops that should be used.
1587 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1588 MVT VT = Op.getValueType();
1589 // If this is a vector of constants or undefs, get the bits. A bit in
1590 // UndefBits is set if the corresponding element of the vector is an
1591 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1593 uint64_t VectorBits[2];
1594 uint64_t UndefBits[2];
1595 uint64_t SplatBits, SplatUndef;
1597 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1598 || !isConstantSplat(VectorBits, UndefBits,
1599 VT.getVectorElementType().getSizeInBits(),
1600 SplatBits, SplatUndef, SplatSize))
1601 return SDValue(); // Not a constant vector, not a splat.
1603 switch (VT.getSimpleVT()) {
1606 uint32_t Value32 = SplatBits;
1607 assert(SplatSize == 4
1608 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1609 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1610 SDValue T = DAG.getConstant(Value32, MVT::i32);
1611 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1612 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1616 uint64_t f64val = SplatBits;
1617 assert(SplatSize == 8
1618 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1619 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1620 SDValue T = DAG.getConstant(f64val, MVT::i64);
1621 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1622 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1626 // 8-bit constants have to be expanded to 16-bits
1627 unsigned short Value16 = SplatBits | (SplatBits << 8);
1629 for (int i = 0; i < 8; ++i)
1630 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1631 return DAG.getNode(ISD::BIT_CONVERT, VT,
1632 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1635 unsigned short Value16;
1637 Value16 = (unsigned short) (SplatBits & 0xffff);
1639 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1640 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1642 for (int i = 0; i < 8; ++i) Ops[i] = T;
1643 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1646 unsigned int Value = SplatBits;
1647 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1648 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1651 uint64_t val = SplatBits;
1652 uint32_t upper = uint32_t(val >> 32);
1653 uint32_t lower = uint32_t(val);
1655 if (upper == lower) {
1656 // Magic constant that can be matched by IL, ILA, et. al.
1657 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1658 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1662 SmallVector<SDValue, 16> ShufBytes;
1664 bool upper_special, lower_special;
1666 // NOTE: This code creates common-case shuffle masks that can be easily
1667 // detected as common expressions. It is not attempting to create highly
1668 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1670 // Detect if the upper or lower half is a special shuffle mask pattern:
1671 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1672 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1674 // Create lower vector if not a special pattern
1675 if (!lower_special) {
1676 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1677 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1678 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1679 LO32C, LO32C, LO32C, LO32C));
1682 // Create upper vector if not a special pattern
1683 if (!upper_special) {
1684 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1685 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1686 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1687 HI32C, HI32C, HI32C, HI32C));
1690 // If either upper or lower are special, then the two input operands are
1691 // the same (basically, one of them is a "don't care")
1696 if (lower_special && upper_special) {
1697 // Unhappy situation... both upper and lower are special, so punt with
1698 // a target constant:
1699 SDValue Zero = DAG.getConstant(0, MVT::i32);
1700 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1704 for (int i = 0; i < 4; ++i) {
1706 for (int j = 0; j < 4; ++j) {
1708 bool process_upper, process_lower;
1710 process_upper = (upper_special && (i & 1) == 0);
1711 process_lower = (lower_special && (i & 1) == 1);
1713 if (process_upper || process_lower) {
1714 if ((process_upper && upper == 0)
1715 || (process_lower && lower == 0))
1717 else if ((process_upper && upper == 0xffffffff)
1718 || (process_lower && lower == 0xffffffff))
1720 else if ((process_upper && upper == 0x80000000)
1721 || (process_lower && lower == 0x80000000))
1722 val |= (j == 0 ? 0xe0 : 0x80);
1724 val |= i * 4 + j + ((i & 1) * 16);
1727 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1730 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1731 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1732 &ShufBytes[0], ShufBytes.size()));
1740 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1741 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1742 /// permutation vector, V3, is monotonically increasing with one "exception"
1743 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1744 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1745 /// In either case, the net result is going to eventually invoke SHUFB to
1746 /// permute/shuffle the bytes from V1 and V2.
1748 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1749 /// control word for byte/halfword/word insertion. This takes care of a single
1750 /// element move from V2 into V1.
1752 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1753 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1754 SDValue V1 = Op.getOperand(0);
1755 SDValue V2 = Op.getOperand(1);
1756 SDValue PermMask = Op.getOperand(2);
1758 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1760 // If we have a single element being moved from V1 to V2, this can be handled
1761 // using the C*[DX] compute mask instructions, but the vector elements have
1762 // to be monotonically increasing with one exception element.
1763 MVT EltVT = V1.getValueType().getVectorElementType();
1764 unsigned EltsFromV2 = 0;
1766 unsigned V2EltIdx0 = 0;
1767 unsigned CurrElt = 0;
1768 bool monotonic = true;
1769 if (EltVT == MVT::i8)
1771 else if (EltVT == MVT::i16)
1773 else if (EltVT == MVT::i32)
1776 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1778 for (unsigned i = 0, e = PermMask.getNumOperands();
1779 EltsFromV2 <= 1 && monotonic && i != e;
1782 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1785 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1787 if (SrcElt >= V2EltIdx0) {
1789 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1790 } else if (CurrElt != SrcElt) {
1797 if (EltsFromV2 == 1 && monotonic) {
1798 // Compute mask and shuffle
1799 MachineFunction &MF = DAG.getMachineFunction();
1800 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1801 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1802 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1803 // Initialize temporary register to 0
1804 SDValue InitTempReg =
1805 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1806 // Copy register's contents as index in INSERT_MASK:
1807 SDValue ShufMaskOp =
1808 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1809 DAG.getTargetConstant(V2Elt, MVT::i32),
1810 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1811 // Use shuffle mask in SHUFB synthetic instruction:
1812 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1814 // Convert the SHUFFLE_VECTOR mask's input element units to the
1816 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1818 SmallVector<SDValue, 16> ResultMask;
1819 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1821 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1824 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1826 for (unsigned j = 0; j < BytesPerElement; ++j) {
1827 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1832 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1833 &ResultMask[0], ResultMask.size());
1834 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1838 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1839 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1841 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1842 // For a constant, build the appropriate constant vector, which will
1843 // eventually simplify to a vector register load.
1845 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1846 SmallVector<SDValue, 16> ConstVecValues;
1850 // Create a constant vector:
1851 switch (Op.getValueType().getSimpleVT()) {
1852 default: assert(0 && "Unexpected constant value type in "
1853 "LowerSCALAR_TO_VECTOR");
1854 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1855 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1856 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1857 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1858 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1859 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1862 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1863 for (size_t j = 0; j < n_copies; ++j)
1864 ConstVecValues.push_back(CValue);
1866 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1867 &ConstVecValues[0], ConstVecValues.size());
1869 // Otherwise, copy the value from one register to another:
1870 switch (Op0.getValueType().getSimpleVT()) {
1871 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1878 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1885 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1886 switch (Op.getValueType().getSimpleVT()) {
1888 cerr << "CellSPU: Unknown vector multiplication, got "
1889 << Op.getValueType().getMVTString()
1895 SDValue rA = Op.getOperand(0);
1896 SDValue rB = Op.getOperand(1);
1897 SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1898 SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1899 SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1900 SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1902 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1906 // Multiply two v8i16 vectors (pipeline friendly version):
1907 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1908 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1909 // c) Use SELB to select upper and lower halves from the intermediate results
1911 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1912 // dual-issue. This code does manage to do this, even if it's a little on
1915 MachineFunction &MF = DAG.getMachineFunction();
1916 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1917 SDValue Chain = Op.getOperand(0);
1918 SDValue rA = Op.getOperand(0);
1919 SDValue rB = Op.getOperand(1);
1920 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1921 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1924 DAG.getCopyToReg(Chain, FSMBIreg,
1925 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1926 DAG.getConstant(0xcccc, MVT::i16)));
1929 DAG.getCopyToReg(FSMBOp, HiProdReg,
1930 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1932 SDValue HHProd_v4i32 =
1933 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1934 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1936 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1937 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1938 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1939 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1941 DAG.getConstant(16, MVT::i16))),
1942 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1945 // This M00sE is N@stI! (apologies to Monty Python)
1947 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1948 // is to break it all apart, sign extend, and reassemble the various
1949 // intermediate products.
1951 SDValue rA = Op.getOperand(0);
1952 SDValue rB = Op.getOperand(1);
1953 SDValue c8 = DAG.getConstant(8, MVT::i32);
1954 SDValue c16 = DAG.getConstant(16, MVT::i32);
1957 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1958 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1959 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1961 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1963 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1966 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1967 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1969 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1970 DAG.getConstant(0x2222, MVT::i16));
1972 SDValue LoProdParts =
1973 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1974 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1975 LLProd, LHProd, FSMBmask));
1977 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1980 DAG.getNode(ISD::AND, MVT::v4i32,
1982 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1983 LoProdMask, LoProdMask,
1984 LoProdMask, LoProdMask));
1987 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1988 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1991 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1992 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1995 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1996 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1997 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2000 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2001 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2002 DAG.getNode(SPUISD::VEC_SRA,
2003 MVT::v4i32, rAH, c8)),
2004 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2005 DAG.getNode(SPUISD::VEC_SRA,
2006 MVT::v4i32, rBH, c8)));
2009 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2011 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2015 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2017 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2018 DAG.getNode(ISD::OR, MVT::v4i32,
2026 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2027 MachineFunction &MF = DAG.getMachineFunction();
2028 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2030 SDValue A = Op.getOperand(0);
2031 SDValue B = Op.getOperand(1);
2032 MVT VT = Op.getValueType();
2034 unsigned VRegBR, VRegC;
2036 if (VT == MVT::f32) {
2037 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2038 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2040 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2041 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2043 // TODO: make sure we're feeding FPInterp the right arguments
2044 // Right now: fi B, frest(B)
2047 // (Floating Interpolate (FP Reciprocal Estimate B))
2049 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2050 DAG.getNode(SPUISD::FPInterp, VT, B,
2051 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2053 // Computes A * BRcpl and stores in a temporary register
2055 DAG.getCopyToReg(BRcpl, VRegC,
2056 DAG.getNode(ISD::FMUL, VT, A,
2057 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2058 // What's the Chain variable do? It's magic!
2059 // TODO: set Chain = Op(0).getEntryNode()
2061 return DAG.getNode(ISD::FADD, VT,
2062 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2063 DAG.getNode(ISD::FMUL, VT,
2064 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2065 DAG.getNode(ISD::FSUB, VT, A,
2066 DAG.getNode(ISD::FMUL, VT, B,
2067 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2070 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2071 MVT VT = Op.getValueType();
2072 SDValue N = Op.getOperand(0);
2073 SDValue Elt = Op.getOperand(1);
2074 SDValue ShufMask[16];
2075 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2077 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2079 int EltNo = (int) C->getZExtValue();
2082 if (VT == MVT::i8 && EltNo >= 16)
2083 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2084 else if (VT == MVT::i16 && EltNo >= 8)
2085 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2086 else if (VT == MVT::i32 && EltNo >= 4)
2087 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2088 else if (VT == MVT::i64 && EltNo >= 2)
2089 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2091 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2092 // i32 and i64: Element 0 is the preferred slot
2093 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2096 // Need to generate shuffle mask and extract:
2097 int prefslot_begin = -1, prefslot_end = -1;
2098 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2100 switch (VT.getSimpleVT()) {
2102 assert(false && "Invalid value type!");
2104 prefslot_begin = prefslot_end = 3;
2108 prefslot_begin = 2; prefslot_end = 3;
2113 prefslot_begin = 0; prefslot_end = 3;
2118 prefslot_begin = 0; prefslot_end = 7;
2123 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2124 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2126 for (int i = 0; i < 16; ++i) {
2127 // zero fill uppper part of preferred slot, don't care about the
2129 unsigned int mask_val;
2131 if (i <= prefslot_end) {
2133 ((i < prefslot_begin)
2135 : elt_byte + (i - prefslot_begin));
2137 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2139 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2142 SDValue ShufMaskVec =
2143 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2145 sizeof(ShufMask) / sizeof(ShufMask[0]));
2147 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2148 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2149 N, N, ShufMaskVec));
2153 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2154 SDValue VecOp = Op.getOperand(0);
2155 SDValue ValOp = Op.getOperand(1);
2156 SDValue IdxOp = Op.getOperand(2);
2157 MVT VT = Op.getValueType();
2159 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2160 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2162 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2163 // Use $2 because it's always 16-byte aligned and it's available:
2164 SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2167 DAG.getNode(SPUISD::SHUFB, VT,
2168 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2170 DAG.getNode(SPUISD::INSERT_MASK, VT,
2171 DAG.getNode(ISD::ADD, PtrVT,
2173 DAG.getConstant(CN->getZExtValue(),
2179 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2181 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2183 assert(Op.getValueType() == MVT::i8);
2186 assert(0 && "Unhandled i8 math operator");
2190 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2192 SDValue N1 = Op.getOperand(1);
2193 N0 = (N0.getOpcode() != ISD::Constant
2194 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2195 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2197 N1 = (N1.getOpcode() != ISD::Constant
2198 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2199 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2201 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2202 DAG.getNode(Opc, MVT::i16, N0, N1));
2206 SDValue N1 = Op.getOperand(1);
2208 N0 = (N0.getOpcode() != ISD::Constant
2209 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2210 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2212 N1Opc = N1.getValueType().bitsLT(MVT::i32)
2215 N1 = (N1.getOpcode() != ISD::Constant
2216 ? DAG.getNode(N1Opc, MVT::i32, N1)
2217 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2220 DAG.getNode(ISD::OR, MVT::i16, N0,
2221 DAG.getNode(ISD::SHL, MVT::i16,
2222 N0, DAG.getConstant(8, MVT::i32)));
2223 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2224 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2228 SDValue N1 = Op.getOperand(1);
2230 N0 = (N0.getOpcode() != ISD::Constant
2231 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2232 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2234 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2237 N1 = (N1.getOpcode() != ISD::Constant
2238 ? DAG.getNode(N1Opc, MVT::i16, N1)
2239 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2241 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2242 DAG.getNode(Opc, MVT::i16, N0, N1));
2245 SDValue N1 = Op.getOperand(1);
2247 N0 = (N0.getOpcode() != ISD::Constant
2248 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2249 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2251 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2254 N1 = (N1.getOpcode() != ISD::Constant
2255 ? DAG.getNode(N1Opc, MVT::i16, N1)
2256 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2258 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2259 DAG.getNode(Opc, MVT::i16, N0, N1));
2262 SDValue N1 = Op.getOperand(1);
2264 N0 = (N0.getOpcode() != ISD::Constant
2265 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2266 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2268 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2269 N1 = (N1.getOpcode() != ISD::Constant
2270 ? DAG.getNode(N1Opc, MVT::i16, N1)
2271 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2273 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2274 DAG.getNode(Opc, MVT::i16, N0, N1));
2282 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2284 MVT VT = Op.getValueType();
2285 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2287 SDValue Op0 = Op.getOperand(0);
2290 case ISD::ZERO_EXTEND:
2291 case ISD::SIGN_EXTEND:
2292 case ISD::ANY_EXTEND: {
2293 MVT Op0VT = Op0.getValueType();
2294 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2296 assert(Op0VT == MVT::i32
2297 && "CellSPU: Zero/sign extending something other than i32");
2298 DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2300 unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2301 ? SPUISD::ROTBYTES_RIGHT_S
2302 : SPUISD::ROTQUAD_RZ_BYTES);
2303 SDValue PromoteScalar =
2304 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2306 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2307 DAG.getNode(ISD::BIT_CONVERT, VecVT,
2308 DAG.getNode(NewOpc, Op0VecVT,
2310 DAG.getConstant(4, MVT::i32))));
2314 // Turn operands into vectors to satisfy type checking (shufb works on
2317 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2319 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2320 SmallVector<SDValue, 16> ShufBytes;
2322 // Create the shuffle mask for "rotating" the borrow up one register slot
2323 // once the borrow is generated.
2324 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2325 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2326 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2327 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2330 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2331 SDValue ShiftedCarry =
2332 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2334 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2335 &ShufBytes[0], ShufBytes.size()));
2337 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2338 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2339 Op0, Op1, ShiftedCarry));
2343 // Turn operands into vectors to satisfy type checking (shufb works on
2346 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2348 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2349 SmallVector<SDValue, 16> ShufBytes;
2351 // Create the shuffle mask for "rotating" the borrow up one register slot
2352 // once the borrow is generated.
2353 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2354 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2355 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2356 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2359 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2360 SDValue ShiftedBorrow =
2361 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2362 BorrowGen, BorrowGen,
2363 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2364 &ShufBytes[0], ShufBytes.size()));
2366 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2367 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2368 Op0, Op1, ShiftedBorrow));
2372 SDValue ShiftAmt = Op.getOperand(1);
2373 MVT ShiftAmtVT = ShiftAmt.getValueType();
2374 SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2376 DAG.getNode(SPUISD::SELB, VecVT,
2378 DAG.getConstant(0, VecVT),
2379 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2380 DAG.getConstant(0xff00ULL, MVT::i16)));
2381 SDValue ShiftAmtBytes =
2382 DAG.getNode(ISD::SRL, ShiftAmtVT,
2384 DAG.getConstant(3, ShiftAmtVT));
2385 SDValue ShiftAmtBits =
2386 DAG.getNode(ISD::AND, ShiftAmtVT,
2388 DAG.getConstant(7, ShiftAmtVT));
2390 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2391 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2392 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2393 MaskLower, ShiftAmtBytes),
2398 MVT VT = Op.getValueType();
2399 SDValue ShiftAmt = Op.getOperand(1);
2400 MVT ShiftAmtVT = ShiftAmt.getValueType();
2401 SDValue ShiftAmtBytes =
2402 DAG.getNode(ISD::SRL, ShiftAmtVT,
2404 DAG.getConstant(3, ShiftAmtVT));
2405 SDValue ShiftAmtBits =
2406 DAG.getNode(ISD::AND, ShiftAmtVT,
2408 DAG.getConstant(7, ShiftAmtVT));
2410 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2411 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2412 Op0, ShiftAmtBytes),
2417 // Promote Op0 to vector
2419 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2420 SDValue ShiftAmt = Op.getOperand(1);
2421 MVT ShiftVT = ShiftAmt.getValueType();
2423 // Negate variable shift amounts
2424 if (!isa<ConstantSDNode>(ShiftAmt)) {
2425 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2426 DAG.getConstant(0, ShiftVT), ShiftAmt);
2429 SDValue UpperHalfSign =
2430 DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2431 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2432 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2433 Op0, DAG.getConstant(31, MVT::i32))));
2434 SDValue UpperHalfSignMask =
2435 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2436 SDValue UpperLowerMask =
2437 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2438 DAG.getConstant(0xff00, MVT::i16));
2439 SDValue UpperLowerSelect =
2440 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2441 UpperHalfSignMask, Op0, UpperLowerMask);
2442 SDValue RotateLeftBytes =
2443 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2444 UpperLowerSelect, ShiftAmt);
2445 SDValue RotateLeftBits =
2446 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2447 RotateLeftBytes, ShiftAmt);
2449 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2457 //! Lower byte immediate operations for v16i8 vectors:
2459 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2462 MVT VT = Op.getValueType();
2464 ConstVec = Op.getOperand(0);
2465 Arg = Op.getOperand(1);
2466 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2467 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2468 ConstVec = ConstVec.getOperand(0);
2470 ConstVec = Op.getOperand(1);
2471 Arg = Op.getOperand(0);
2472 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2473 ConstVec = ConstVec.getOperand(0);
2478 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2479 uint64_t VectorBits[2];
2480 uint64_t UndefBits[2];
2481 uint64_t SplatBits, SplatUndef;
2484 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2485 && isConstantSplat(VectorBits, UndefBits,
2486 VT.getVectorElementType().getSizeInBits(),
2487 SplatBits, SplatUndef, SplatSize)) {
2489 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2490 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2492 // Turn the BUILD_VECTOR into a set of target constants:
2493 for (size_t i = 0; i < tcVecSize; ++i)
2496 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2497 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2500 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2501 // lowered. Return the operation, rather than a null SDValue.
2505 //! Lower i32 multiplication
2506 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2508 switch (VT.getSimpleVT()) {
2510 cerr << "CellSPU: Unknown LowerMUL value type, got "
2511 << Op.getValueType().getMVTString()
2517 SDValue rA = Op.getOperand(0);
2518 SDValue rB = Op.getOperand(1);
2520 return DAG.getNode(ISD::ADD, MVT::i32,
2521 DAG.getNode(ISD::ADD, MVT::i32,
2522 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2523 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2524 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2531 //! Custom lowering for CTPOP (count population)
2533 Custom lowering code that counts the number ones in the input
2534 operand. SPU has such an instruction, but it counts the number of
2535 ones per byte, which then have to be accumulated.
2537 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2538 MVT VT = Op.getValueType();
2539 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2541 switch (VT.getSimpleVT()) {
2543 assert(false && "Invalid value type!");
2545 SDValue N = Op.getOperand(0);
2546 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2548 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2549 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2551 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2555 MachineFunction &MF = DAG.getMachineFunction();
2556 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2558 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2560 SDValue N = Op.getOperand(0);
2561 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2562 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2563 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2565 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2566 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2568 // CNTB_result becomes the chain to which all of the virtual registers
2569 // CNTB_reg, SUM1_reg become associated:
2570 SDValue CNTB_result =
2571 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2573 SDValue CNTB_rescopy =
2574 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2576 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2578 return DAG.getNode(ISD::AND, MVT::i16,
2579 DAG.getNode(ISD::ADD, MVT::i16,
2580 DAG.getNode(ISD::SRL, MVT::i16,
2587 MachineFunction &MF = DAG.getMachineFunction();
2588 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2590 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2591 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2593 SDValue N = Op.getOperand(0);
2594 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2595 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2596 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2597 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2599 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2600 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2602 // CNTB_result becomes the chain to which all of the virtual registers
2603 // CNTB_reg, SUM1_reg become associated:
2604 SDValue CNTB_result =
2605 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2607 SDValue CNTB_rescopy =
2608 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2611 DAG.getNode(ISD::SRL, MVT::i32,
2612 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2615 DAG.getNode(ISD::ADD, MVT::i32,
2616 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2618 SDValue Sum1_rescopy =
2619 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2622 DAG.getNode(ISD::SRL, MVT::i32,
2623 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2626 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2627 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2629 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2639 /// LowerOperation - Provide custom lowering hooks for some operations.
2642 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2644 unsigned Opc = (unsigned) Op.getOpcode();
2645 MVT VT = Op.getValueType();
2649 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2650 cerr << "Op.getOpcode() = " << Opc << "\n";
2651 cerr << "*Op.getNode():\n";
2652 Op.getNode()->dump();
2658 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2660 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2661 case ISD::ConstantPool:
2662 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2663 case ISD::GlobalAddress:
2664 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2665 case ISD::JumpTable:
2666 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2668 return LowerConstant(Op, DAG);
2669 case ISD::ConstantFP:
2670 return LowerConstantFP(Op, DAG);
2672 return LowerBRCOND(Op, DAG);
2673 case ISD::FORMAL_ARGUMENTS:
2674 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2676 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2678 return LowerRET(Op, DAG, getTargetMachine());
2681 // i8, i64 math ops:
2682 case ISD::ZERO_EXTEND:
2683 case ISD::SIGN_EXTEND:
2684 case ISD::ANY_EXTEND:
2693 return LowerI8Math(Op, DAG, Opc);
2694 else if (VT == MVT::i64)
2695 return LowerI64Math(Op, DAG, Opc);
2699 // Vector-related lowering.
2700 case ISD::BUILD_VECTOR:
2701 return LowerBUILD_VECTOR(Op, DAG);
2702 case ISD::SCALAR_TO_VECTOR:
2703 return LowerSCALAR_TO_VECTOR(Op, DAG);
2704 case ISD::VECTOR_SHUFFLE:
2705 return LowerVECTOR_SHUFFLE(Op, DAG);
2706 case ISD::EXTRACT_VECTOR_ELT:
2707 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2708 case ISD::INSERT_VECTOR_ELT:
2709 return LowerINSERT_VECTOR_ELT(Op, DAG);
2711 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2715 return LowerByteImmed(Op, DAG);
2717 // Vector and i8 multiply:
2720 return LowerVectorMUL(Op, DAG);
2721 else if (VT == MVT::i8)
2722 return LowerI8Math(Op, DAG, Opc);
2724 return LowerMUL(Op, DAG, VT, Opc);
2727 if (VT == MVT::f32 || VT == MVT::v4f32)
2728 return LowerFDIVf32(Op, DAG);
2729 // else if (Op.getValueType() == MVT::f64)
2730 // return LowerFDIVf64(Op, DAG);
2732 assert(0 && "Calling FDIV on unsupported MVT");
2735 return LowerCTPOP(Op, DAG);
2741 SDNode *SPUTargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG)
2744 unsigned Opc = (unsigned) N->getOpcode();
2745 MVT OpVT = N->getValueType(0);
2749 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2750 cerr << "Op.getOpcode() = " << Opc << "\n";
2751 cerr << "*Op.getNode():\n";
2759 /* Otherwise, return unchanged */
2763 //===----------------------------------------------------------------------===//
2764 // Target Optimization Hooks
2765 //===----------------------------------------------------------------------===//
2768 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2771 TargetMachine &TM = getTargetMachine();
2773 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2774 SelectionDAG &DAG = DCI.DAG;
2775 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2776 SDValue Result; // Initially, NULL result
2778 switch (N->getOpcode()) {
2781 SDValue Op1 = N->getOperand(1);
2783 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2784 SDValue Op01 = Op0.getOperand(1);
2785 if (Op01.getOpcode() == ISD::Constant
2786 || Op01.getOpcode() == ISD::TargetConstant) {
2787 // (add <const>, (SPUindirect <arg>, <const>)) ->
2788 // (SPUindirect <arg>, <const + const>)
2789 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2790 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2791 SDValue combinedConst =
2792 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2793 Op0.getValueType());
2795 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2796 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2797 DEBUG(cerr << "With: (SPUindirect <arg>, "
2798 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2799 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2800 Op0.getOperand(0), combinedConst);
2802 } else if (isa<ConstantSDNode>(Op0)
2803 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2804 SDValue Op11 = Op1.getOperand(1);
2805 if (Op11.getOpcode() == ISD::Constant
2806 || Op11.getOpcode() == ISD::TargetConstant) {
2807 // (add (SPUindirect <arg>, <const>), <const>) ->
2808 // (SPUindirect <arg>, <const + const>)
2809 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2810 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2811 SDValue combinedConst =
2812 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2813 Op0.getValueType());
2815 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2816 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2817 DEBUG(cerr << "With: (SPUindirect <arg>, "
2818 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2820 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2821 Op1.getOperand(0), combinedConst);
2826 case ISD::SIGN_EXTEND:
2827 case ISD::ZERO_EXTEND:
2828 case ISD::ANY_EXTEND: {
2829 if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2830 N->getValueType(0) == Op0.getValueType()) {
2831 // (any_extend (SPUextract_elt0 <arg>)) ->
2832 // (SPUextract_elt0 <arg>)
2833 // Types must match, however...
2834 DEBUG(cerr << "Replace: ");
2835 DEBUG(N->dump(&DAG));
2836 DEBUG(cerr << "\nWith: ");
2837 DEBUG(Op0.getNode()->dump(&DAG));
2838 DEBUG(cerr << "\n");
2844 case SPUISD::IndirectAddr: {
2845 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2846 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2847 if (CN->getZExtValue() == 0) {
2848 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2849 // (SPUaform <addr>, 0)
2851 DEBUG(cerr << "Replace: ");
2852 DEBUG(N->dump(&DAG));
2853 DEBUG(cerr << "\nWith: ");
2854 DEBUG(Op0.getNode()->dump(&DAG));
2855 DEBUG(cerr << "\n");
2862 case SPUISD::SHLQUAD_L_BITS:
2863 case SPUISD::SHLQUAD_L_BYTES:
2864 case SPUISD::VEC_SHL:
2865 case SPUISD::VEC_SRL:
2866 case SPUISD::VEC_SRA:
2867 case SPUISD::ROTQUAD_RZ_BYTES:
2868 case SPUISD::ROTQUAD_RZ_BITS: {
2869 SDValue Op1 = N->getOperand(1);
2871 if (isa<ConstantSDNode>(Op1)) {
2872 // Kill degenerate vector shifts:
2873 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2875 if (CN->getZExtValue() == 0) {
2881 case SPUISD::PROMOTE_SCALAR: {
2882 switch (Op0.getOpcode()) {
2885 case ISD::ANY_EXTEND:
2886 case ISD::ZERO_EXTEND:
2887 case ISD::SIGN_EXTEND: {
2888 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2890 // but only if the SPUpromote_scalar and <arg> types match.
2891 SDValue Op00 = Op0.getOperand(0);
2892 if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2893 SDValue Op000 = Op00.getOperand(0);
2894 if (Op000.getValueType() == N->getValueType(0)) {
2900 case SPUISD::EXTRACT_ELT0: {
2901 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2903 Result = Op0.getOperand(0);
2910 // Otherwise, return unchanged.
2912 if (Result.getNode()) {
2913 DEBUG(cerr << "\nReplace.SPU: ");
2914 DEBUG(N->dump(&DAG));
2915 DEBUG(cerr << "\nWith: ");
2916 DEBUG(Result.getNode()->dump(&DAG));
2917 DEBUG(cerr << "\n");
2924 //===----------------------------------------------------------------------===//
2925 // Inline Assembly Support
2926 //===----------------------------------------------------------------------===//
2928 /// getConstraintType - Given a constraint letter, return the type of
2929 /// constraint it is for this target.
2930 SPUTargetLowering::ConstraintType
2931 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2932 if (ConstraintLetter.size() == 1) {
2933 switch (ConstraintLetter[0]) {
2940 return C_RegisterClass;
2943 return TargetLowering::getConstraintType(ConstraintLetter);
2946 std::pair<unsigned, const TargetRegisterClass*>
2947 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2950 if (Constraint.size() == 1) {
2951 // GCC RS6000 Constraint Letters
2952 switch (Constraint[0]) {
2956 return std::make_pair(0U, SPU::R64CRegisterClass);
2957 return std::make_pair(0U, SPU::R32CRegisterClass);
2960 return std::make_pair(0U, SPU::R32FPRegisterClass);
2961 else if (VT == MVT::f64)
2962 return std::make_pair(0U, SPU::R64FPRegisterClass);
2965 return std::make_pair(0U, SPU::GPRCRegisterClass);
2969 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2972 //! Compute used/known bits for a SPU operand
2974 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2978 const SelectionDAG &DAG,
2979 unsigned Depth ) const {
2981 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2984 switch (Op.getOpcode()) {
2986 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2996 case SPUISD::PROMOTE_SCALAR: {
2997 SDValue Op0 = Op.getOperand(0);
2998 MVT Op0VT = Op0.getValueType();
2999 unsigned Op0VTBits = Op0VT.getSizeInBits();
3000 uint64_t InMask = Op0VT.getIntegerVTBitMask();
3001 KnownZero |= APInt(Op0VTBits, ~InMask, false);
3002 KnownOne |= APInt(Op0VTBits, InMask, false);
3006 case SPUISD::LDRESULT:
3007 case SPUISD::EXTRACT_ELT0:
3008 case SPUISD::EXTRACT_ELT0_CHAINED: {
3009 MVT OpVT = Op.getValueType();
3010 unsigned OpVTBits = OpVT.getSizeInBits();
3011 uint64_t InMask = OpVT.getIntegerVTBitMask();
3012 KnownZero |= APInt(OpVTBits, ~InMask, false);
3013 KnownOne |= APInt(OpVTBits, InMask, false);
3018 case EXTRACT_I1_ZEXT:
3019 case EXTRACT_I1_SEXT:
3020 case EXTRACT_I8_ZEXT:
3021 case EXTRACT_I8_SEXT:
3026 case SPUISD::SHLQUAD_L_BITS:
3027 case SPUISD::SHLQUAD_L_BYTES:
3028 case SPUISD::VEC_SHL:
3029 case SPUISD::VEC_SRL:
3030 case SPUISD::VEC_SRA:
3031 case SPUISD::VEC_ROTL:
3032 case SPUISD::VEC_ROTR:
3033 case SPUISD::ROTQUAD_RZ_BYTES:
3034 case SPUISD::ROTQUAD_RZ_BITS:
3035 case SPUISD::ROTBYTES_RIGHT_S:
3036 case SPUISD::ROTBYTES_LEFT:
3037 case SPUISD::ROTBYTES_LEFT_CHAINED:
3038 case SPUISD::SELECT_MASK:
3040 case SPUISD::FPInterp:
3041 case SPUISD::FPRecipEst:
3042 case SPUISD::SEXT32TO64:
3047 // LowerAsmOperandForConstraint
3049 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3050 char ConstraintLetter,
3052 std::vector<SDValue> &Ops,
3053 SelectionDAG &DAG) const {
3054 // Default, for the time being, to the base class handler
3055 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3059 /// isLegalAddressImmediate - Return true if the integer value can be used
3060 /// as the offset of the target addressing mode.
3061 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3062 const Type *Ty) const {
3063 // SPU's addresses are 256K:
3064 return (V > -(1 << 18) && V < (1 << 18) - 1);
3067 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3072 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3073 // The SPU target isn't yet aware of offsets.