1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT mapping to useful data for Cell SPU
41 struct valtype_map_s {
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDValue &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDValue &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // Initialize libcalls:
134 setLibcallName(RTLIB::MUL_I64, "__muldi3");
136 // SPU has no sign or zero extended loads for i1, i8, i16:
137 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
138 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
141 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
142 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
143 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
144 setTruncStoreAction(MVT::i8, MVT::i8, Custom);
145 setTruncStoreAction(MVT::i16, MVT::i8, Custom);
146 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
147 setTruncStoreAction(MVT::i64, MVT::i8, Custom);
148 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
150 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
151 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
152 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
154 // SPU constant load actions are custom lowered:
155 setOperationAction(ISD::Constant, MVT::i64, Custom);
156 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
157 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
159 // SPU's loads and stores have to be custom lowered:
160 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
162 MVT VT = (MVT::SimpleValueType)sctype;
164 setOperationAction(ISD::LOAD, VT, Custom);
165 setOperationAction(ISD::STORE, VT, Custom);
168 // Custom lower BRCOND for i8 to "promote" the result to i16
169 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
171 // Expand the jumptable branches
172 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
173 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
175 // Custom lower SELECT_CC for most cases, but expand by default
176 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
177 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
178 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
179 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
180 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
182 // SPU has no intrinsics for these particular operations:
183 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
185 // PowerPC has no SREM/UREM instructions
186 setOperationAction(ISD::SREM, MVT::i32, Expand);
187 setOperationAction(ISD::UREM, MVT::i32, Expand);
188 setOperationAction(ISD::SREM, MVT::i64, Expand);
189 setOperationAction(ISD::UREM, MVT::i64, Expand);
191 // We don't support sin/cos/sqrt/fmod
192 setOperationAction(ISD::FSIN , MVT::f64, Expand);
193 setOperationAction(ISD::FCOS , MVT::f64, Expand);
194 setOperationAction(ISD::FREM , MVT::f64, Expand);
195 setOperationAction(ISD::FSIN , MVT::f32, Expand);
196 setOperationAction(ISD::FCOS , MVT::f32, Expand);
197 setOperationAction(ISD::FREM , MVT::f32, Expand);
199 // If we're enabling GP optimizations, use hardware square root
200 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
201 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
203 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
204 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
206 // SPU can do rotate right and left, so legalize it... but customize for i8
207 // because instructions don't exist.
209 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
211 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
212 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
213 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
215 setOperationAction(ISD::ROTL, MVT::i32, Legal);
216 setOperationAction(ISD::ROTL, MVT::i16, Legal);
217 setOperationAction(ISD::ROTL, MVT::i8, Custom);
219 // SPU has no native version of shift left/right for i8
220 setOperationAction(ISD::SHL, MVT::i8, Custom);
221 setOperationAction(ISD::SRL, MVT::i8, Custom);
222 setOperationAction(ISD::SRA, MVT::i8, Custom);
224 // SPU needs custom lowering for shift left/right for i64
225 setOperationAction(ISD::SHL, MVT::i64, Custom);
226 setOperationAction(ISD::SRL, MVT::i64, Custom);
227 setOperationAction(ISD::SRA, MVT::i64, Custom);
229 // Custom lower i8, i32 and i64 multiplications
230 setOperationAction(ISD::MUL, MVT::i8, Custom);
231 setOperationAction(ISD::MUL, MVT::i32, Custom);
232 setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
234 // SMUL_LOHI, UMUL_LOHI
235 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
236 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
237 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom);
238 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
240 // Need to custom handle (some) common i8, i64 math ops
241 setOperationAction(ISD::ADD, MVT::i64, Custom);
242 setOperationAction(ISD::SUB, MVT::i8, Custom);
243 setOperationAction(ISD::SUB, MVT::i64, Custom);
245 // SPU does not have BSWAP. It does have i32 support CTLZ.
246 // CTPOP has to be custom lowered.
247 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
248 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
250 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
251 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
252 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
253 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
255 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
256 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
258 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
260 // SPU has a version of select that implements (a&~c)|(b&c), just like
261 // select ought to work:
262 setOperationAction(ISD::SELECT, MVT::i8, Legal);
263 setOperationAction(ISD::SELECT, MVT::i16, Legal);
264 setOperationAction(ISD::SELECT, MVT::i32, Legal);
265 setOperationAction(ISD::SELECT, MVT::i64, Expand);
267 setOperationAction(ISD::SETCC, MVT::i8, Legal);
268 setOperationAction(ISD::SETCC, MVT::i16, Legal);
269 setOperationAction(ISD::SETCC, MVT::i32, Legal);
270 setOperationAction(ISD::SETCC, MVT::i64, Expand);
272 // Zero extension and sign extension for i64 have to be
274 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
275 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
276 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
278 // SPU has a legal FP -> signed INT instruction
279 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
280 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
281 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
282 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
284 // FDIV on SPU requires custom lowering
285 setOperationAction(ISD::FDIV, MVT::f32, Custom);
286 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
288 // SPU has [U|S]INT_TO_FP
289 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
290 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
291 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
292 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
293 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
294 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
295 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
296 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
298 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
299 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
300 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
301 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
303 // We cannot sextinreg(i1). Expand to shifts.
304 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
306 // Support label based line numbers.
307 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
308 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
310 // We want to legalize GlobalAddress and ConstantPool nodes into the
311 // appropriate instructions to materialize the address.
312 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
314 MVT VT = (MVT::SimpleValueType)sctype;
316 setOperationAction(ISD::GlobalAddress, VT, Custom);
317 setOperationAction(ISD::ConstantPool, VT, Custom);
318 setOperationAction(ISD::JumpTable, VT, Custom);
321 // RET must be custom lowered, to meet ABI requirements
322 setOperationAction(ISD::RET, MVT::Other, Custom);
324 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
325 setOperationAction(ISD::VASTART , MVT::Other, Custom);
327 // Use the default implementation.
328 setOperationAction(ISD::VAARG , MVT::Other, Expand);
329 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
330 setOperationAction(ISD::VAEND , MVT::Other, Expand);
331 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
332 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
333 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
334 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
336 // Cell SPU has instructions for converting between i64 and fp.
337 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
338 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
340 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
341 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
343 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
344 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
346 // First set operation action for all vector types to expand. Then we
347 // will selectively turn on ones that can be effectively codegen'd.
348 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
349 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
350 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
351 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
352 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
353 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
355 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
356 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
357 MVT VT = (MVT::SimpleValueType)i;
359 // add/sub are legal for all supported vector VT's.
360 setOperationAction(ISD::ADD , VT, Legal);
361 setOperationAction(ISD::SUB , VT, Legal);
362 // mul has to be custom lowered.
363 setOperationAction(ISD::MUL , VT, Custom);
365 setOperationAction(ISD::AND , VT, Legal);
366 setOperationAction(ISD::OR , VT, Legal);
367 setOperationAction(ISD::XOR , VT, Legal);
368 setOperationAction(ISD::LOAD , VT, Legal);
369 setOperationAction(ISD::SELECT, VT, Legal);
370 setOperationAction(ISD::STORE, VT, Legal);
372 // These operations need to be expanded:
373 setOperationAction(ISD::SDIV, VT, Expand);
374 setOperationAction(ISD::SREM, VT, Expand);
375 setOperationAction(ISD::UDIV, VT, Expand);
376 setOperationAction(ISD::UREM, VT, Expand);
377 setOperationAction(ISD::FDIV, VT, Custom);
379 // Custom lower build_vector, constant pool spills, insert and
380 // extract vector elements:
381 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
382 setOperationAction(ISD::ConstantPool, VT, Custom);
383 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
384 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
385 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
386 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
389 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
390 setOperationAction(ISD::AND, MVT::v16i8, Custom);
391 setOperationAction(ISD::OR, MVT::v16i8, Custom);
392 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
393 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
395 setShiftAmountType(MVT::i32);
396 setBooleanContents(ZeroOrOneBooleanContent);
398 setStackPointerRegisterToSaveRestore(SPU::R1);
400 // We have target-specific dag combine patterns for the following nodes:
401 setTargetDAGCombine(ISD::ADD);
402 setTargetDAGCombine(ISD::ZERO_EXTEND);
403 setTargetDAGCombine(ISD::SIGN_EXTEND);
404 setTargetDAGCombine(ISD::ANY_EXTEND);
406 computeRegisterProperties();
408 // Set other properties:
409 setSchedulingPreference(SchedulingForLatency);
413 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
415 if (node_names.empty()) {
416 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
417 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
418 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
419 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
420 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
421 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
422 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
423 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
424 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
425 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
426 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
427 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
428 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
429 node_names[(unsigned) SPUISD::VEC2PREFSLOT_CHAINED]
430 = "SPUISD::VEC2PREFSLOT_CHAINED";
431 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
432 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
433 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
434 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
435 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
436 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
437 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
438 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
439 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
440 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
441 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
442 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
443 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
444 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
445 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
446 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
447 "SPUISD::ROTQUAD_RZ_BYTES";
448 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
449 "SPUISD::ROTQUAD_RZ_BITS";
450 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
451 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
452 "SPUISD::ROTBYTES_LEFT_CHAINED";
453 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
454 "SPUISD::ROTBYTES_LEFT_BITS";
455 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
456 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
457 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
458 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
459 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
460 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
461 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
462 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
463 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
466 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
468 return ((i != node_names.end()) ? i->second : 0);
471 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
472 MVT VT = Op.getValueType();
473 return (VT.isInteger() ? VT : MVT(MVT::i32));
476 //===----------------------------------------------------------------------===//
477 // Calling convention code:
478 //===----------------------------------------------------------------------===//
480 #include "SPUGenCallingConv.inc"
482 //===----------------------------------------------------------------------===//
483 // LowerOperation implementation
484 //===----------------------------------------------------------------------===//
486 /// Aligned load common code for CellSPU
488 \param[in] Op The SelectionDAG load or store operand
489 \param[in] DAG The selection DAG
490 \param[in] ST CellSPU subtarget information structure
491 \param[in,out] alignment Caller initializes this to the load or store node's
492 value from getAlignment(), may be updated while generating the aligned load
493 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
494 offset (divisible by 16, modulo 16 == 0)
495 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
496 offset of the preferred slot (modulo 16 != 0)
497 \param[in,out] VT Caller initializes this value type to the the load or store
498 node's loaded or stored value type; may be updated if an i1-extended load or
500 \param[out] was16aligned true if the base pointer had 16-byte alignment,
501 otherwise false. Can help to determine if the chunk needs to be rotated.
503 Both load and store lowering load a block of data aligned on a 16-byte
504 boundary. This is the common aligned load code shared between both.
507 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
509 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
510 MVT &VT, bool &was16aligned)
512 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
513 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
514 SDValue basePtr = LSN->getBasePtr();
515 SDValue chain = LSN->getChain();
517 if (basePtr.getOpcode() == ISD::ADD) {
518 SDValue Op1 = basePtr.getNode()->getOperand(1);
520 if (Op1.getOpcode() == ISD::Constant
521 || Op1.getOpcode() == ISD::TargetConstant) {
522 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
524 alignOffs = (int) CN->getZExtValue();
525 prefSlotOffs = (int) (alignOffs & 0xf);
527 // Adjust the rotation amount to ensure that the final result ends up in
528 // the preferred slot:
529 prefSlotOffs -= vtm->prefslot_byte;
530 basePtr = basePtr.getOperand(0);
532 // Loading from memory, can we adjust alignment?
533 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
534 SDValue APtr = basePtr.getOperand(0);
535 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
536 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
537 alignment = GSDN->getGlobal()->getAlignment();
542 prefSlotOffs = -vtm->prefslot_byte;
544 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
545 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
546 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
547 prefSlotOffs = (int) (alignOffs & 0xf);
548 prefSlotOffs -= vtm->prefslot_byte;
549 basePtr = DAG.getRegister(SPU::R1, VT);
552 prefSlotOffs = -vtm->prefslot_byte;
555 if (alignment == 16) {
556 // Realign the base pointer as a D-Form address:
557 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
558 basePtr = DAG.getNode(ISD::ADD, PtrVT,
560 DAG.getConstant((alignOffs & ~0xf), PtrVT));
563 // Emit the vector load:
565 return DAG.getLoad(MVT::v16i8, chain, basePtr,
566 LSN->getSrcValue(), LSN->getSrcValueOffset(),
567 LSN->isVolatile(), 16);
570 // Unaligned load or we're using the "large memory" model, which means that
571 // we have to be very pessimistic:
572 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
573 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
574 DAG.getConstant(0, PtrVT));
578 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
579 DAG.getConstant((alignOffs & ~0xf), PtrVT));
580 was16aligned = false;
581 return DAG.getLoad(MVT::v16i8, chain, basePtr,
582 LSN->getSrcValue(), LSN->getSrcValueOffset(),
583 LSN->isVolatile(), 16);
586 /// Custom lower loads for CellSPU
588 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
589 within a 16-byte block, we have to rotate to extract the requested element.
592 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
593 LoadSDNode *LN = cast<LoadSDNode>(Op);
594 SDValue the_chain = LN->getChain();
595 MVT VT = LN->getMemoryVT();
596 MVT OpVT = Op.getNode()->getValueType(0);
597 ISD::LoadExtType ExtType = LN->getExtensionType();
598 unsigned alignment = LN->getAlignment();
601 switch (LN->getAddressingMode()) {
602 case ISD::UNINDEXED: {
606 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
608 if (result.getNode() == 0)
611 the_chain = result.getValue(1);
612 // Rotate the chunk if necessary
615 if (rotamt != 0 || !was16aligned) {
616 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
621 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
623 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
624 LoadSDNode *LN1 = cast<LoadSDNode>(result);
625 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
626 DAG.getConstant(rotamt, PtrVT));
629 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
630 the_chain = result.getValue(1);
633 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
635 MVT vecVT = MVT::v16i8;
637 // Convert the loaded v16i8 vector to the appropriate vector type
638 // specified by the operand:
641 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
643 vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
646 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
647 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
648 result = DAG.getNode(SPUISD::VEC2PREFSLOT_CHAINED, scalarvts, Ops, 2);
649 the_chain = result.getValue(1);
651 // Handle the sign and zero-extending loads for i1 and i8:
654 if (ExtType == ISD::SEXTLOAD) {
655 NewOpC = (OpVT == MVT::i1
656 ? SPUISD::EXTRACT_I1_SEXT
657 : SPUISD::EXTRACT_I8_SEXT);
659 assert(ExtType == ISD::ZEXTLOAD);
660 NewOpC = (OpVT == MVT::i1
661 ? SPUISD::EXTRACT_I1_ZEXT
662 : SPUISD::EXTRACT_I8_ZEXT);
665 result = DAG.getNode(NewOpC, OpVT, result);
668 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
669 SDValue retops[2] = {
674 result = DAG.getNode(SPUISD::LDRESULT, retvts,
675 retops, sizeof(retops) / sizeof(retops[0]));
682 case ISD::LAST_INDEXED_MODE:
683 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
685 cerr << (unsigned) LN->getAddressingMode() << "\n";
693 /// Custom lower stores for CellSPU
695 All CellSPU stores are aligned to 16-byte boundaries, so for elements
696 within a 16-byte block, we have to generate a shuffle to insert the
697 requested element into its place, then store the resulting block.
700 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
701 StoreSDNode *SN = cast<StoreSDNode>(Op);
702 SDValue Value = SN->getValue();
703 MVT VT = Value.getValueType();
704 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
705 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
706 unsigned alignment = SN->getAlignment();
708 switch (SN->getAddressingMode()) {
709 case ISD::UNINDEXED: {
710 int chunk_offset, slot_offset;
713 // The vector type we really want to load from the 16-byte chunk.
714 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
715 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
717 SDValue alignLoadVec =
718 AlignedLoad(Op, DAG, ST, SN, alignment,
719 chunk_offset, slot_offset, VT, was16aligned);
721 if (alignLoadVec.getNode() == 0)
724 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
725 SDValue basePtr = LN->getBasePtr();
726 SDValue the_chain = alignLoadVec.getValue(1);
727 SDValue theValue = SN->getValue();
731 && (theValue.getOpcode() == ISD::AssertZext
732 || theValue.getOpcode() == ISD::AssertSext)) {
733 // Drill down and get the value for zero- and sign-extended
735 theValue = theValue.getOperand(0);
740 SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
741 SDValue insertEltPtr;
743 // If the base pointer is already a D-form address, then just create
744 // a new D-form address with a slot offset and the orignal base pointer.
745 // Otherwise generate a D-form address with the slot offset relative
746 // to the stack pointer, which is always aligned.
747 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
748 DEBUG(basePtr.getNode()->dump(&DAG));
751 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
752 (basePtr.getOpcode() == ISD::ADD
753 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
754 insertEltPtr = basePtr;
756 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
759 SDValue insertEltOp =
760 DAG.getNode(SPUISD::SHUFFLE_MASK, stVecVT, insertEltPtr);
761 SDValue vectorizeOp =
762 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
764 result = DAG.getNode(SPUISD::SHUFB, vecVT, vectorizeOp, alignLoadVec,
765 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
767 result = DAG.getStore(the_chain, result, basePtr,
768 LN->getSrcValue(), LN->getSrcValueOffset(),
769 LN->isVolatile(), LN->getAlignment());
771 #if 0 && defined(NDEBUG)
772 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
773 const SDValue ¤tRoot = DAG.getRoot();
776 cerr << "------- CellSPU:LowerStore result:\n";
779 DAG.setRoot(currentRoot);
790 case ISD::LAST_INDEXED_MODE:
791 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
793 cerr << (unsigned) SN->getAddressingMode() << "\n";
801 /// Generate the address of a constant pool entry.
803 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
804 MVT PtrVT = Op.getValueType();
805 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
806 Constant *C = CP->getConstVal();
807 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
808 SDValue Zero = DAG.getConstant(0, PtrVT);
809 const TargetMachine &TM = DAG.getTarget();
811 if (TM.getRelocationModel() == Reloc::Static) {
812 if (!ST->usingLargeMem()) {
813 // Just return the SDValue with the constant pool address in it.
814 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
816 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
817 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
818 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
823 "LowerConstantPool: Relocation model other than static"
829 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
830 MVT PtrVT = Op.getValueType();
831 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
832 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
833 SDValue Zero = DAG.getConstant(0, PtrVT);
834 const TargetMachine &TM = DAG.getTarget();
836 if (TM.getRelocationModel() == Reloc::Static) {
837 if (!ST->usingLargeMem()) {
838 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
840 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
841 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
842 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
847 "LowerJumpTable: Relocation model other than static not supported.");
852 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
853 MVT PtrVT = Op.getValueType();
854 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
855 GlobalValue *GV = GSDN->getGlobal();
856 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
857 const TargetMachine &TM = DAG.getTarget();
858 SDValue Zero = DAG.getConstant(0, PtrVT);
860 if (TM.getRelocationModel() == Reloc::Static) {
861 if (!ST->usingLargeMem()) {
862 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
864 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
865 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
866 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
869 cerr << "LowerGlobalAddress: Relocation model other than static not "
878 //! Custom lower i64 integer constants
880 This code inserts all of the necessary juggling that needs to occur to load
881 a 64-bit constant into a register.
884 LowerConstant(SDValue Op, SelectionDAG &DAG) {
885 MVT VT = Op.getValueType();
886 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
888 if (VT == MVT::i64) {
889 SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
890 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
891 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
893 cerr << "LowerConstant: unhandled constant type "
903 //! Custom lower double precision floating point constants
905 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
906 MVT VT = Op.getValueType();
907 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
910 "LowerConstantFP: Node is not ConstantFPSDNode");
912 if (VT == MVT::f64) {
913 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
914 return DAG.getNode(ISD::BIT_CONVERT, VT,
915 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
921 //! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
923 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
925 SDValue Cond = Op.getOperand(1);
926 MVT CondVT = Cond.getValueType();
929 if (CondVT == MVT::i8) {
931 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
933 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
936 return SDValue(); // Unchanged
940 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
942 MachineFunction &MF = DAG.getMachineFunction();
943 MachineFrameInfo *MFI = MF.getFrameInfo();
944 MachineRegisterInfo &RegInfo = MF.getRegInfo();
945 SmallVector<SDValue, 48> ArgValues;
946 SDValue Root = Op.getOperand(0);
947 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
949 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
950 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
952 unsigned ArgOffset = SPUFrameInfo::minStackSize();
953 unsigned ArgRegIdx = 0;
954 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
956 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
958 // Add DAG nodes to load the arguments or copy them out of registers.
959 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
960 ArgNo != e; ++ArgNo) {
961 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
962 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
965 if (ArgRegIdx < NumArgRegs) {
966 const TargetRegisterClass *ArgRegClass;
968 switch (ObjectVT.getSimpleVT()) {
970 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
971 << ObjectVT.getMVTString()
976 ArgRegClass = &SPU::R8CRegClass;
979 ArgRegClass = &SPU::R16CRegClass;
982 ArgRegClass = &SPU::R32CRegClass;
985 ArgRegClass = &SPU::R64CRegClass;
988 ArgRegClass = &SPU::R32FPRegClass;
991 ArgRegClass = &SPU::R64FPRegClass;
999 ArgRegClass = &SPU::VECREGRegClass;
1003 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1004 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1005 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1008 // We need to load the argument to a virtual register if we determined
1009 // above that we ran out of physical registers of the appropriate type
1010 // or we're forced to do vararg
1011 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1012 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1013 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1014 ArgOffset += StackSlotSize;
1017 ArgValues.push_back(ArgVal);
1019 Root = ArgVal.getOperand(0);
1024 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1025 // We will spill (79-3)+1 registers to the stack
1026 SmallVector<SDValue, 79-3+1> MemOps;
1028 // Create the frame slot
1030 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1031 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1032 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1033 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1034 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1035 Root = Store.getOperand(0);
1036 MemOps.push_back(Store);
1038 // Increment address by stack slot size for the next stored argument
1039 ArgOffset += StackSlotSize;
1041 if (!MemOps.empty())
1042 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1045 ArgValues.push_back(Root);
1047 // Return the new list of results.
1048 return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1052 /// isLSAAddress - Return the immediate to use if the specified
1053 /// value is representable as a LSA address.
1054 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1055 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1058 int Addr = C->getZExtValue();
1059 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1060 (Addr << 14 >> 14) != Addr)
1061 return 0; // Top 14 bits have to be sext of immediate.
1063 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1068 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1069 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1070 SDValue Chain = TheCall->getChain();
1071 SDValue Callee = TheCall->getCallee();
1072 unsigned NumOps = TheCall->getNumArgs();
1073 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1074 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1075 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1077 // Handy pointer type
1078 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1080 // Accumulate how many bytes are to be pushed on the stack, including the
1081 // linkage area, and parameter passing area. According to the SPU ABI,
1082 // we minimally need space for [LR] and [SP]
1083 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1085 // Set up a copy of the stack pointer for use loading and storing any
1086 // arguments that may not fit in the registers available for argument
1088 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1090 // Figure out which arguments are going to go in registers, and which in
1092 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1093 unsigned ArgRegIdx = 0;
1095 // Keep track of registers passing arguments
1096 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1097 // And the arguments passed on the stack
1098 SmallVector<SDValue, 8> MemOpChains;
1100 for (unsigned i = 0; i != NumOps; ++i) {
1101 SDValue Arg = TheCall->getArg(i);
1103 // PtrOff will be used to store the current argument to the stack if a
1104 // register cannot be found for it.
1105 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1106 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1108 switch (Arg.getValueType().getSimpleVT()) {
1109 default: assert(0 && "Unexpected ValueType for argument!");
1113 if (ArgRegIdx != NumArgRegs) {
1114 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1116 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1117 ArgOffset += StackSlotSize;
1122 if (ArgRegIdx != NumArgRegs) {
1123 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1125 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1126 ArgOffset += StackSlotSize;
1133 if (ArgRegIdx != NumArgRegs) {
1134 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1136 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1137 ArgOffset += StackSlotSize;
1143 // Update number of stack bytes actually used, insert a call sequence start
1144 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1145 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1148 if (!MemOpChains.empty()) {
1149 // Adjust the stack pointer for the stack arguments.
1150 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1151 &MemOpChains[0], MemOpChains.size());
1154 // Build a sequence of copy-to-reg nodes chained together with token chain
1155 // and flag operands which copy the outgoing args into the appropriate regs.
1157 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1158 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1160 InFlag = Chain.getValue(1);
1163 SmallVector<SDValue, 8> Ops;
1164 unsigned CallOpc = SPUISD::CALL;
1166 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1167 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1168 // node so that legalize doesn't hack it.
1169 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1170 GlobalValue *GV = G->getGlobal();
1171 MVT CalleeVT = Callee.getValueType();
1172 SDValue Zero = DAG.getConstant(0, PtrVT);
1173 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1175 if (!ST->usingLargeMem()) {
1176 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1177 // style calls, otherwise, external symbols are BRASL calls. This assumes
1178 // that declared/defined symbols are in the same compilation unit and can
1179 // be reached through PC-relative jumps.
1182 // This may be an unsafe assumption for JIT and really large compilation
1184 if (GV->isDeclaration()) {
1185 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1187 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1190 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1192 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1194 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1195 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1196 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1197 // If this is an absolute destination address that appears to be a legal
1198 // local store address, use the munged value.
1199 Callee = SDValue(Dest, 0);
1202 Ops.push_back(Chain);
1203 Ops.push_back(Callee);
1205 // Add argument registers to the end of the list so that they are known live
1207 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1208 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1209 RegsToPass[i].second.getValueType()));
1211 if (InFlag.getNode())
1212 Ops.push_back(InFlag);
1213 // Returns a chain and a flag for retval copy to use.
1214 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1215 &Ops[0], Ops.size());
1216 InFlag = Chain.getValue(1);
1218 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1219 DAG.getIntPtrConstant(0, true), InFlag);
1220 if (TheCall->getValueType(0) != MVT::Other)
1221 InFlag = Chain.getValue(1);
1223 SDValue ResultVals[3];
1224 unsigned NumResults = 0;
1226 // If the call has results, copy the values out of the ret val registers.
1227 switch (TheCall->getValueType(0).getSimpleVT()) {
1228 default: assert(0 && "Unexpected ret value!");
1229 case MVT::Other: break;
1231 if (TheCall->getValueType(1) == MVT::i32) {
1232 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1233 ResultVals[0] = Chain.getValue(0);
1234 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1235 Chain.getValue(2)).getValue(1);
1236 ResultVals[1] = Chain.getValue(0);
1239 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1240 ResultVals[0] = Chain.getValue(0);
1245 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1246 ResultVals[0] = Chain.getValue(0);
1251 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1252 InFlag).getValue(1);
1253 ResultVals[0] = Chain.getValue(0);
1261 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1262 InFlag).getValue(1);
1263 ResultVals[0] = Chain.getValue(0);
1268 // If the function returns void, just return the chain.
1269 if (NumResults == 0)
1272 // Otherwise, merge everything together with a MERGE_VALUES node.
1273 ResultVals[NumResults++] = Chain;
1274 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1275 return Res.getValue(Op.getResNo());
1279 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1280 SmallVector<CCValAssign, 16> RVLocs;
1281 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1282 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1283 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1284 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1286 // If this is the first return lowered for this function, add the regs to the
1287 // liveout set for the function.
1288 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1289 for (unsigned i = 0; i != RVLocs.size(); ++i)
1290 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1293 SDValue Chain = Op.getOperand(0);
1296 // Copy the result values into the output registers.
1297 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1298 CCValAssign &VA = RVLocs[i];
1299 assert(VA.isRegLoc() && "Can only return in registers!");
1300 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1301 Flag = Chain.getValue(1);
1305 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1307 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1311 //===----------------------------------------------------------------------===//
1312 // Vector related lowering:
1313 //===----------------------------------------------------------------------===//
1315 static ConstantSDNode *
1316 getVecImm(SDNode *N) {
1317 SDValue OpVal(0, 0);
1319 // Check to see if this buildvec has a single non-undef value in its elements.
1320 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1321 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1322 if (OpVal.getNode() == 0)
1323 OpVal = N->getOperand(i);
1324 else if (OpVal != N->getOperand(i))
1328 if (OpVal.getNode() != 0) {
1329 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1334 return 0; // All UNDEF: use implicit def.; not Constant node
1337 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1338 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1340 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1342 if (ConstantSDNode *CN = getVecImm(N)) {
1343 uint64_t Value = CN->getZExtValue();
1344 if (ValueType == MVT::i64) {
1345 uint64_t UValue = CN->getZExtValue();
1346 uint32_t upper = uint32_t(UValue >> 32);
1347 uint32_t lower = uint32_t(UValue);
1350 Value = Value >> 32;
1352 if (Value <= 0x3ffff)
1353 return DAG.getTargetConstant(Value, ValueType);
1359 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1360 /// and the value fits into a signed 16-bit constant, and if so, return the
1362 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1364 if (ConstantSDNode *CN = getVecImm(N)) {
1365 int64_t Value = CN->getSExtValue();
1366 if (ValueType == MVT::i64) {
1367 uint64_t UValue = CN->getZExtValue();
1368 uint32_t upper = uint32_t(UValue >> 32);
1369 uint32_t lower = uint32_t(UValue);
1372 Value = Value >> 32;
1374 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1375 return DAG.getTargetConstant(Value, ValueType);
1382 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1383 /// and the value fits into a signed 10-bit constant, and if so, return the
1385 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1387 if (ConstantSDNode *CN = getVecImm(N)) {
1388 int64_t Value = CN->getSExtValue();
1389 if (ValueType == MVT::i64) {
1390 uint64_t UValue = CN->getZExtValue();
1391 uint32_t upper = uint32_t(UValue >> 32);
1392 uint32_t lower = uint32_t(UValue);
1395 Value = Value >> 32;
1397 if (isS10Constant(Value))
1398 return DAG.getTargetConstant(Value, ValueType);
1404 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1405 /// and the value fits into a signed 8-bit constant, and if so, return the
1408 /// @note: The incoming vector is v16i8 because that's the only way we can load
1409 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1411 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1413 if (ConstantSDNode *CN = getVecImm(N)) {
1414 int Value = (int) CN->getZExtValue();
1415 if (ValueType == MVT::i16
1416 && Value <= 0xffff /* truncated from uint64_t */
1417 && ((short) Value >> 8) == ((short) Value & 0xff))
1418 return DAG.getTargetConstant(Value & 0xff, ValueType);
1419 else if (ValueType == MVT::i8
1420 && (Value & 0xff) == Value)
1421 return DAG.getTargetConstant(Value, ValueType);
1427 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1428 /// and the value fits into a signed 16-bit constant, and if so, return the
1430 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1432 if (ConstantSDNode *CN = getVecImm(N)) {
1433 uint64_t Value = CN->getZExtValue();
1434 if ((ValueType == MVT::i32
1435 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1436 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1437 return DAG.getTargetConstant(Value >> 16, ValueType);
1443 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1444 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1445 if (ConstantSDNode *CN = getVecImm(N)) {
1446 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1452 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1453 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1454 if (ConstantSDNode *CN = getVecImm(N)) {
1455 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1461 // If this is a vector of constants or undefs, get the bits. A bit in
1462 // UndefBits is set if the corresponding element of the vector is an
1463 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1464 // zero. Return true if this is not an array of constants, false if it is.
1466 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1467 uint64_t UndefBits[2]) {
1468 // Start with zero'd results.
1469 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1471 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1472 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1473 SDValue OpVal = BV->getOperand(i);
1475 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1476 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1478 uint64_t EltBits = 0;
1479 if (OpVal.getOpcode() == ISD::UNDEF) {
1480 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1481 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1483 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1484 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1485 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1486 const APFloat &apf = CN->getValueAPF();
1487 EltBits = (CN->getValueType(0) == MVT::f32
1488 ? FloatToBits(apf.convertToFloat())
1489 : DoubleToBits(apf.convertToDouble()));
1491 // Nonconstant element.
1495 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1498 //printf("%llx %llx %llx %llx\n",
1499 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1503 /// If this is a splat (repetition) of a value across the whole vector, return
1504 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1505 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1506 /// SplatSize = 1 byte.
1507 static bool isConstantSplat(const uint64_t Bits128[2],
1508 const uint64_t Undef128[2],
1510 uint64_t &SplatBits, uint64_t &SplatUndef,
1512 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1513 // the same as the lower 64-bits, ignoring undefs.
1514 uint64_t Bits64 = Bits128[0] | Bits128[1];
1515 uint64_t Undef64 = Undef128[0] & Undef128[1];
1516 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1517 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1518 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1519 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1521 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1522 if (MinSplatBits < 64) {
1524 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1526 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1527 if (MinSplatBits < 32) {
1529 // If the top 16-bits are different than the lower 16-bits, ignoring
1530 // undefs, we have an i32 splat.
1531 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1532 if (MinSplatBits < 16) {
1533 // If the top 8-bits are different than the lower 8-bits, ignoring
1534 // undefs, we have an i16 splat.
1535 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1536 == ((Bits16 >> 8) & ~Undef16)) {
1537 // Otherwise, we have an 8-bit splat.
1538 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1539 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1545 SplatUndef = Undef16;
1552 SplatUndef = Undef32;
1558 SplatBits = Bits128[0];
1559 SplatUndef = Undef128[0];
1565 return false; // Can't be a splat if two pieces don't match.
1568 // If this is a case we can't handle, return null and let the default
1569 // expansion code take care of it. If we CAN select this case, and if it
1570 // selects to a single instruction, return Op. Otherwise, if we can codegen
1571 // this case more efficiently than a constant pool load, lower it to the
1572 // sequence of ops that should be used.
1573 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1574 MVT VT = Op.getValueType();
1575 // If this is a vector of constants or undefs, get the bits. A bit in
1576 // UndefBits is set if the corresponding element of the vector is an
1577 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1579 uint64_t VectorBits[2];
1580 uint64_t UndefBits[2];
1581 uint64_t SplatBits, SplatUndef;
1583 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1584 || !isConstantSplat(VectorBits, UndefBits,
1585 VT.getVectorElementType().getSizeInBits(),
1586 SplatBits, SplatUndef, SplatSize))
1587 return SDValue(); // Not a constant vector, not a splat.
1589 switch (VT.getSimpleVT()) {
1592 uint32_t Value32 = SplatBits;
1593 assert(SplatSize == 4
1594 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1595 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1596 SDValue T = DAG.getConstant(Value32, MVT::i32);
1597 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1598 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1602 uint64_t f64val = SplatBits;
1603 assert(SplatSize == 8
1604 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1605 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1606 SDValue T = DAG.getConstant(f64val, MVT::i64);
1607 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1608 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1612 // 8-bit constants have to be expanded to 16-bits
1613 unsigned short Value16 = SplatBits | (SplatBits << 8);
1615 for (int i = 0; i < 8; ++i)
1616 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1617 return DAG.getNode(ISD::BIT_CONVERT, VT,
1618 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1621 unsigned short Value16;
1623 Value16 = (unsigned short) (SplatBits & 0xffff);
1625 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1626 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1628 for (int i = 0; i < 8; ++i) Ops[i] = T;
1629 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1632 unsigned int Value = SplatBits;
1633 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1634 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1637 uint64_t val = SplatBits;
1638 uint32_t upper = uint32_t(val >> 32);
1639 uint32_t lower = uint32_t(val);
1641 if (upper == lower) {
1642 // Magic constant that can be matched by IL, ILA, et. al.
1643 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1644 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1648 SmallVector<SDValue, 16> ShufBytes;
1650 bool upper_special, lower_special;
1652 // NOTE: This code creates common-case shuffle masks that can be easily
1653 // detected as common expressions. It is not attempting to create highly
1654 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1656 // Detect if the upper or lower half is a special shuffle mask pattern:
1657 upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
1658 lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
1660 // Create lower vector if not a special pattern
1661 if (!lower_special) {
1662 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1663 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1664 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1665 LO32C, LO32C, LO32C, LO32C));
1668 // Create upper vector if not a special pattern
1669 if (!upper_special) {
1670 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1671 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1672 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1673 HI32C, HI32C, HI32C, HI32C));
1676 // If either upper or lower are special, then the two input operands are
1677 // the same (basically, one of them is a "don't care")
1682 if (lower_special && upper_special) {
1683 // Unhappy situation... both upper and lower are special, so punt with
1684 // a target constant:
1685 SDValue Zero = DAG.getConstant(0, MVT::i32);
1686 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1690 for (int i = 0; i < 4; ++i) {
1692 for (int j = 0; j < 4; ++j) {
1694 bool process_upper, process_lower;
1696 process_upper = (upper_special && (i & 1) == 0);
1697 process_lower = (lower_special && (i & 1) == 1);
1699 if (process_upper || process_lower) {
1700 if ((process_upper && upper == 0)
1701 || (process_lower && lower == 0))
1703 else if ((process_upper && upper == 0xffffffff)
1704 || (process_lower && lower == 0xffffffff))
1706 else if ((process_upper && upper == 0x80000000)
1707 || (process_lower && lower == 0x80000000))
1708 val |= (j == 0 ? 0xe0 : 0x80);
1710 val |= i * 4 + j + ((i & 1) * 16);
1713 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1716 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1717 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1718 &ShufBytes[0], ShufBytes.size()));
1726 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1727 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1728 /// permutation vector, V3, is monotonically increasing with one "exception"
1729 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1730 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1731 /// In either case, the net result is going to eventually invoke SHUFB to
1732 /// permute/shuffle the bytes from V1 and V2.
1734 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1735 /// control word for byte/halfword/word insertion. This takes care of a single
1736 /// element move from V2 into V1.
1738 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1739 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1740 SDValue V1 = Op.getOperand(0);
1741 SDValue V2 = Op.getOperand(1);
1742 SDValue PermMask = Op.getOperand(2);
1744 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1746 // If we have a single element being moved from V1 to V2, this can be handled
1747 // using the C*[DX] compute mask instructions, but the vector elements have
1748 // to be monotonically increasing with one exception element.
1749 MVT EltVT = V1.getValueType().getVectorElementType();
1750 unsigned EltsFromV2 = 0;
1752 unsigned V2EltIdx0 = 0;
1753 unsigned CurrElt = 0;
1754 bool monotonic = true;
1755 if (EltVT == MVT::i8)
1757 else if (EltVT == MVT::i16)
1759 else if (EltVT == MVT::i32)
1762 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1764 for (unsigned i = 0, e = PermMask.getNumOperands();
1765 EltsFromV2 <= 1 && monotonic && i != e;
1768 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1771 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1773 if (SrcElt >= V2EltIdx0) {
1775 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1776 } else if (CurrElt != SrcElt) {
1783 if (EltsFromV2 == 1 && monotonic) {
1784 // Compute mask and shuffle
1785 MachineFunction &MF = DAG.getMachineFunction();
1786 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1787 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1788 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1789 // Initialize temporary register to 0
1790 SDValue InitTempReg =
1791 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1792 // Copy register's contents as index in SHUFFLE_MASK:
1793 SDValue ShufMaskOp =
1794 DAG.getNode(SPUISD::SHUFFLE_MASK, V1.getValueType(),
1795 DAG.getTargetConstant(V2Elt, MVT::i32),
1796 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1797 // Use shuffle mask in SHUFB synthetic instruction:
1798 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1800 // Convert the SHUFFLE_VECTOR mask's input element units to the
1802 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1804 SmallVector<SDValue, 16> ResultMask;
1805 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1807 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1810 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1812 for (unsigned j = 0; j < BytesPerElement; ++j) {
1813 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1818 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1819 &ResultMask[0], ResultMask.size());
1820 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1824 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1825 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1827 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1828 // For a constant, build the appropriate constant vector, which will
1829 // eventually simplify to a vector register load.
1831 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1832 SmallVector<SDValue, 16> ConstVecValues;
1836 // Create a constant vector:
1837 switch (Op.getValueType().getSimpleVT()) {
1838 default: assert(0 && "Unexpected constant value type in "
1839 "LowerSCALAR_TO_VECTOR");
1840 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1841 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1842 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1843 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1844 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1845 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1848 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1849 for (size_t j = 0; j < n_copies; ++j)
1850 ConstVecValues.push_back(CValue);
1852 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1853 &ConstVecValues[0], ConstVecValues.size());
1855 // Otherwise, copy the value from one register to another:
1856 switch (Op0.getValueType().getSimpleVT()) {
1857 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1864 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1871 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1872 switch (Op.getValueType().getSimpleVT()) {
1874 cerr << "CellSPU: Unknown vector multiplication, got "
1875 << Op.getValueType().getMVTString()
1881 SDValue rA = Op.getOperand(0);
1882 SDValue rB = Op.getOperand(1);
1883 SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1884 SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1885 SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1886 SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1888 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1892 // Multiply two v8i16 vectors (pipeline friendly version):
1893 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1894 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1895 // c) Use SELB to select upper and lower halves from the intermediate results
1897 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1898 // dual-issue. This code does manage to do this, even if it's a little on
1901 MachineFunction &MF = DAG.getMachineFunction();
1902 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1903 SDValue Chain = Op.getOperand(0);
1904 SDValue rA = Op.getOperand(0);
1905 SDValue rB = Op.getOperand(1);
1906 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1907 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1910 DAG.getCopyToReg(Chain, FSMBIreg,
1911 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1912 DAG.getConstant(0xcccc, MVT::i16)));
1915 DAG.getCopyToReg(FSMBOp, HiProdReg,
1916 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1918 SDValue HHProd_v4i32 =
1919 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1920 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1922 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1923 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1924 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1925 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1927 DAG.getConstant(16, MVT::i16))),
1928 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1931 // This M00sE is N@stI! (apologies to Monty Python)
1933 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1934 // is to break it all apart, sign extend, and reassemble the various
1935 // intermediate products.
1937 SDValue rA = Op.getOperand(0);
1938 SDValue rB = Op.getOperand(1);
1939 SDValue c8 = DAG.getConstant(8, MVT::i32);
1940 SDValue c16 = DAG.getConstant(16, MVT::i32);
1943 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1944 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1945 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1947 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1949 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1952 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1953 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1955 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1956 DAG.getConstant(0x2222, MVT::i16));
1958 SDValue LoProdParts =
1959 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1960 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1961 LLProd, LHProd, FSMBmask));
1963 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1966 DAG.getNode(ISD::AND, MVT::v4i32,
1968 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1969 LoProdMask, LoProdMask,
1970 LoProdMask, LoProdMask));
1973 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1974 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1977 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1978 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1981 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1982 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1983 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1986 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1987 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1988 DAG.getNode(SPUISD::VEC_SRA,
1989 MVT::v4i32, rAH, c8)),
1990 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1991 DAG.getNode(SPUISD::VEC_SRA,
1992 MVT::v4i32, rBH, c8)));
1995 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1997 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2001 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2003 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2004 DAG.getNode(ISD::OR, MVT::v4i32,
2012 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2013 MachineFunction &MF = DAG.getMachineFunction();
2014 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2016 SDValue A = Op.getOperand(0);
2017 SDValue B = Op.getOperand(1);
2018 MVT VT = Op.getValueType();
2020 unsigned VRegBR, VRegC;
2022 if (VT == MVT::f32) {
2023 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2024 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2026 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2027 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2029 // TODO: make sure we're feeding FPInterp the right arguments
2030 // Right now: fi B, frest(B)
2033 // (Floating Interpolate (FP Reciprocal Estimate B))
2035 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2036 DAG.getNode(SPUISD::FPInterp, VT, B,
2037 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2039 // Computes A * BRcpl and stores in a temporary register
2041 DAG.getCopyToReg(BRcpl, VRegC,
2042 DAG.getNode(ISD::FMUL, VT, A,
2043 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2044 // What's the Chain variable do? It's magic!
2045 // TODO: set Chain = Op(0).getEntryNode()
2047 return DAG.getNode(ISD::FADD, VT,
2048 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2049 DAG.getNode(ISD::FMUL, VT,
2050 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2051 DAG.getNode(ISD::FSUB, VT, A,
2052 DAG.getNode(ISD::FMUL, VT, B,
2053 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2056 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2057 MVT VT = Op.getValueType();
2058 SDValue N = Op.getOperand(0);
2059 SDValue Elt = Op.getOperand(1);
2062 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2063 // Constant argument:
2064 int EltNo = (int) C->getZExtValue();
2067 if (VT == MVT::i8 && EltNo >= 16)
2068 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2069 else if (VT == MVT::i16 && EltNo >= 8)
2070 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2071 else if (VT == MVT::i32 && EltNo >= 4)
2072 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2073 else if (VT == MVT::i64 && EltNo >= 2)
2074 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2076 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2077 // i32 and i64: Element 0 is the preferred slot
2078 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
2081 // Need to generate shuffle mask and extract:
2082 int prefslot_begin = -1, prefslot_end = -1;
2083 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2085 switch (VT.getSimpleVT()) {
2087 assert(false && "Invalid value type!");
2089 prefslot_begin = prefslot_end = 3;
2093 prefslot_begin = 2; prefslot_end = 3;
2098 prefslot_begin = 0; prefslot_end = 3;
2103 prefslot_begin = 0; prefslot_end = 7;
2108 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2109 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2111 unsigned int ShufBytes[16];
2112 for (int i = 0; i < 16; ++i) {
2113 // zero fill uppper part of preferred slot, don't care about the
2115 unsigned int mask_val;
2116 if (i <= prefslot_end) {
2118 ((i < prefslot_begin)
2120 : elt_byte + (i - prefslot_begin));
2122 ShufBytes[i] = mask_val;
2124 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
2127 SDValue ShufMask[4];
2128 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2129 unsigned bidx = i / 4;
2130 unsigned int bits = ((ShufBytes[bidx] << 24) |
2131 (ShufBytes[bidx+1] << 16) |
2132 (ShufBytes[bidx+2] << 8) |
2134 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2137 SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2139 sizeof(ShufMask) / sizeof(ShufMask[0]));
2141 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2142 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2143 N, N, ShufMaskVec));
2145 // Variable index: Rotate the requested element into slot 0, then replicate
2146 // slot 0 across the vector
2147 MVT VecVT = N.getValueType();
2148 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2149 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
2153 // Make life easier by making sure the index is zero-extended to i32
2154 if (Elt.getValueType() != MVT::i32)
2155 Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
2157 // Scale the index to a bit/byte shift quantity
2159 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2160 unsigned scaleShift = scaleFactor.logBase2();
2163 if (scaleShift > 0) {
2164 // Scale the shift factor:
2165 Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2166 DAG.getConstant(scaleShift, MVT::i32));
2169 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2171 // Replicate the bytes starting at byte 0 across the entire vector (for
2172 // consistency with the notion of a unified register set)
2175 switch (VT.getSimpleVT()) {
2177 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2181 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2182 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2187 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2188 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2194 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2195 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2201 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2202 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2203 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2204 loFactor, hiFactor);
2209 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2210 DAG.getNode(SPUISD::SHUFB, VecVT, vecShift, vecShift, replicate));
2216 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2217 SDValue VecOp = Op.getOperand(0);
2218 SDValue ValOp = Op.getOperand(1);
2219 SDValue IdxOp = Op.getOperand(2);
2220 MVT VT = Op.getValueType();
2222 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2223 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2225 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2226 // Use $2 because it's always 16-byte aligned and it's available:
2227 SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2230 DAG.getNode(SPUISD::SHUFB, VT,
2231 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2233 DAG.getNode(SPUISD::SHUFFLE_MASK, VT,
2234 DAG.getNode(ISD::ADD, PtrVT,
2236 DAG.getConstant(CN->getZExtValue(),
2242 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2244 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2246 assert(Op.getValueType() == MVT::i8);
2249 assert(0 && "Unhandled i8 math operator");
2253 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2255 SDValue N1 = Op.getOperand(1);
2256 N0 = (N0.getOpcode() != ISD::Constant
2257 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2258 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2260 N1 = (N1.getOpcode() != ISD::Constant
2261 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2262 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2264 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2265 DAG.getNode(Opc, MVT::i16, N0, N1));
2269 SDValue N1 = Op.getOperand(1);
2271 N0 = (N0.getOpcode() != ISD::Constant
2272 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2273 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2275 N1Opc = N1.getValueType().bitsLT(MVT::i32)
2278 N1 = (N1.getOpcode() != ISD::Constant
2279 ? DAG.getNode(N1Opc, MVT::i32, N1)
2280 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2283 DAG.getNode(ISD::OR, MVT::i16, N0,
2284 DAG.getNode(ISD::SHL, MVT::i16,
2285 N0, DAG.getConstant(8, MVT::i32)));
2286 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2287 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2291 SDValue N1 = Op.getOperand(1);
2293 N0 = (N0.getOpcode() != ISD::Constant
2294 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2295 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2297 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2300 N1 = (N1.getOpcode() != ISD::Constant
2301 ? DAG.getNode(N1Opc, MVT::i16, N1)
2302 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2304 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2305 DAG.getNode(Opc, MVT::i16, N0, N1));
2308 SDValue N1 = Op.getOperand(1);
2310 N0 = (N0.getOpcode() != ISD::Constant
2311 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2312 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2314 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2317 N1 = (N1.getOpcode() != ISD::Constant
2318 ? DAG.getNode(N1Opc, MVT::i16, N1)
2319 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2321 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2322 DAG.getNode(Opc, MVT::i16, N0, N1));
2325 SDValue N1 = Op.getOperand(1);
2327 N0 = (N0.getOpcode() != ISD::Constant
2328 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2329 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2331 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2332 N1 = (N1.getOpcode() != ISD::Constant
2333 ? DAG.getNode(N1Opc, MVT::i16, N1)
2334 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2336 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2337 DAG.getNode(Opc, MVT::i16, N0, N1));
2345 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2347 MVT VT = Op.getValueType();
2348 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2350 SDValue Op0 = Op.getOperand(0);
2353 case ISD::ZERO_EXTEND:
2354 case ISD::SIGN_EXTEND:
2355 case ISD::ANY_EXTEND: {
2356 MVT Op0VT = Op0.getValueType();
2357 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2359 assert(Op0VT == MVT::i32
2360 && "CellSPU: Zero/sign extending something other than i32");
2362 DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n");
2364 SDValue PromoteScalar =
2365 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2367 if (Opc != ISD::SIGN_EXTEND) {
2368 // Use a shuffle to zero extend the i32 to i64 directly:
2370 DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
2371 DAG.getConstant(0x80808080, MVT::i32),
2372 DAG.getConstant(0x00010203, MVT::i32),
2373 DAG.getConstant(0x80808080, MVT::i32),
2374 DAG.getConstant(0x08090a0b, MVT::i32));
2375 SDValue zextShuffle =
2376 DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2377 PromoteScalar, PromoteScalar, shufMask);
2379 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2380 DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle));
2382 // SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift
2383 // right and propagate the sign bit) instruction.
2385 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT,
2386 PromoteScalar, DAG.getConstant(4, MVT::i32));
2388 DAG.getNode(SPUISD::VEC_SRA, Op0VecVT,
2389 PromoteScalar, DAG.getConstant(32, MVT::i32));
2391 DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT,
2392 DAG.getConstant(0xf0f0, MVT::i16));
2393 SDValue CombineQuad =
2394 DAG.getNode(SPUISD::SELB, Op0VecVT,
2395 SignQuad, RotQuad, SelMask);
2397 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2398 DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad));
2403 // Turn operands into vectors to satisfy type checking (shufb works on
2406 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2408 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2409 SmallVector<SDValue, 16> ShufBytes;
2411 // Create the shuffle mask for "rotating" the borrow up one register slot
2412 // once the borrow is generated.
2413 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2414 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2415 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2416 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2419 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2420 SDValue ShiftedCarry =
2421 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2423 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2424 &ShufBytes[0], ShufBytes.size()));
2426 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2427 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2428 Op0, Op1, ShiftedCarry));
2432 // Turn operands into vectors to satisfy type checking (shufb works on
2435 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2437 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2438 SmallVector<SDValue, 16> ShufBytes;
2440 // Create the shuffle mask for "rotating" the borrow up one register slot
2441 // once the borrow is generated.
2442 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2443 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2444 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2445 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2448 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2449 SDValue ShiftedBorrow =
2450 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2451 BorrowGen, BorrowGen,
2452 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2453 &ShufBytes[0], ShufBytes.size()));
2455 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2456 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2457 Op0, Op1, ShiftedBorrow));
2461 SDValue ShiftAmt = Op.getOperand(1);
2462 MVT ShiftAmtVT = ShiftAmt.getValueType();
2463 SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2465 DAG.getNode(SPUISD::SELB, VecVT,
2467 DAG.getConstant(0, VecVT),
2468 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2469 DAG.getConstant(0xff00ULL, MVT::i16)));
2470 SDValue ShiftAmtBytes =
2471 DAG.getNode(ISD::SRL, ShiftAmtVT,
2473 DAG.getConstant(3, ShiftAmtVT));
2474 SDValue ShiftAmtBits =
2475 DAG.getNode(ISD::AND, ShiftAmtVT,
2477 DAG.getConstant(7, ShiftAmtVT));
2479 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2480 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2481 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2482 MaskLower, ShiftAmtBytes),
2487 MVT VT = Op.getValueType();
2488 SDValue ShiftAmt = Op.getOperand(1);
2489 MVT ShiftAmtVT = ShiftAmt.getValueType();
2490 SDValue ShiftAmtBytes =
2491 DAG.getNode(ISD::SRL, ShiftAmtVT,
2493 DAG.getConstant(3, ShiftAmtVT));
2494 SDValue ShiftAmtBits =
2495 DAG.getNode(ISD::AND, ShiftAmtVT,
2497 DAG.getConstant(7, ShiftAmtVT));
2499 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2500 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2501 Op0, ShiftAmtBytes),
2506 // Promote Op0 to vector
2508 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2509 SDValue ShiftAmt = Op.getOperand(1);
2510 MVT ShiftVT = ShiftAmt.getValueType();
2512 // Negate variable shift amounts
2513 if (!isa<ConstantSDNode>(ShiftAmt)) {
2514 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2515 DAG.getConstant(0, ShiftVT), ShiftAmt);
2518 SDValue UpperHalfSign =
2519 DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32,
2520 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2521 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2522 Op0, DAG.getConstant(31, MVT::i32))));
2523 SDValue UpperHalfSignMask =
2524 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2525 SDValue UpperLowerMask =
2526 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2527 DAG.getConstant(0xff00, MVT::i16));
2528 SDValue UpperLowerSelect =
2529 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2530 UpperHalfSignMask, Op0, UpperLowerMask);
2531 SDValue RotateLeftBytes =
2532 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2533 UpperLowerSelect, ShiftAmt);
2534 SDValue RotateLeftBits =
2535 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2536 RotateLeftBytes, ShiftAmt);
2538 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2546 //! Lower byte immediate operations for v16i8 vectors:
2548 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2551 MVT VT = Op.getValueType();
2553 ConstVec = Op.getOperand(0);
2554 Arg = Op.getOperand(1);
2555 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2556 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2557 ConstVec = ConstVec.getOperand(0);
2559 ConstVec = Op.getOperand(1);
2560 Arg = Op.getOperand(0);
2561 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2562 ConstVec = ConstVec.getOperand(0);
2567 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2568 uint64_t VectorBits[2];
2569 uint64_t UndefBits[2];
2570 uint64_t SplatBits, SplatUndef;
2573 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2574 && isConstantSplat(VectorBits, UndefBits,
2575 VT.getVectorElementType().getSizeInBits(),
2576 SplatBits, SplatUndef, SplatSize)) {
2578 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2579 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2581 // Turn the BUILD_VECTOR into a set of target constants:
2582 for (size_t i = 0; i < tcVecSize; ++i)
2585 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2586 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2589 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2590 // lowered. Return the operation, rather than a null SDValue.
2594 //! Lower i32 multiplication
2595 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2597 switch (VT.getSimpleVT()) {
2599 cerr << "CellSPU: Unknown LowerMUL value type, got "
2600 << Op.getValueType().getMVTString()
2606 SDValue rA = Op.getOperand(0);
2607 SDValue rB = Op.getOperand(1);
2609 return DAG.getNode(ISD::ADD, MVT::i32,
2610 DAG.getNode(ISD::ADD, MVT::i32,
2611 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2612 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2613 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2620 //! Custom lowering for CTPOP (count population)
2622 Custom lowering code that counts the number ones in the input
2623 operand. SPU has such an instruction, but it counts the number of
2624 ones per byte, which then have to be accumulated.
2626 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2627 MVT VT = Op.getValueType();
2628 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2630 switch (VT.getSimpleVT()) {
2632 assert(false && "Invalid value type!");
2634 SDValue N = Op.getOperand(0);
2635 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2637 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2638 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2640 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2644 MachineFunction &MF = DAG.getMachineFunction();
2645 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2647 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2649 SDValue N = Op.getOperand(0);
2650 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2651 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2652 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2654 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2655 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2657 // CNTB_result becomes the chain to which all of the virtual registers
2658 // CNTB_reg, SUM1_reg become associated:
2659 SDValue CNTB_result =
2660 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2662 SDValue CNTB_rescopy =
2663 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2665 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2667 return DAG.getNode(ISD::AND, MVT::i16,
2668 DAG.getNode(ISD::ADD, MVT::i16,
2669 DAG.getNode(ISD::SRL, MVT::i16,
2676 MachineFunction &MF = DAG.getMachineFunction();
2677 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2679 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2680 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2682 SDValue N = Op.getOperand(0);
2683 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2684 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2685 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2686 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2688 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2689 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2691 // CNTB_result becomes the chain to which all of the virtual registers
2692 // CNTB_reg, SUM1_reg become associated:
2693 SDValue CNTB_result =
2694 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2696 SDValue CNTB_rescopy =
2697 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2700 DAG.getNode(ISD::SRL, MVT::i32,
2701 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2704 DAG.getNode(ISD::ADD, MVT::i32,
2705 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2707 SDValue Sum1_rescopy =
2708 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2711 DAG.getNode(ISD::SRL, MVT::i32,
2712 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2715 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2716 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2718 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2728 //! Lower ISD::SELECT_CC
2730 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2733 \note Need to revisit this in the future: if the code path through the true
2734 and false value computations is longer than the latency of a branch (6
2735 cycles), then it would be more advantageous to branch and insert a new basic
2736 block and branch on the condition. However, this code does not make that
2737 assumption, given the simplisitc uses so far.
2740 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
2741 MVT VT = Op.getValueType();
2742 SDValue lhs = Op.getOperand(0);
2743 SDValue rhs = Op.getOperand(1);
2744 SDValue trueval = Op.getOperand(2);
2745 SDValue falseval = Op.getOperand(3);
2746 SDValue condition = Op.getOperand(4);
2748 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2749 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2750 // with another "cannot select select_cc" assert:
2752 SDValue compare = DAG.getNode(ISD::SETCC, VT, lhs, rhs, condition);
2753 return DAG.getNode(SPUISD::SELB, VT, trueval, falseval, compare);
2756 //! Custom (target-specific) lowering entry point
2758 This is where LLVM's DAG selection process calls to do target-specific
2762 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2764 unsigned Opc = (unsigned) Op.getOpcode();
2765 MVT VT = Op.getValueType();
2769 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2770 cerr << "Op.getOpcode() = " << Opc << "\n";
2771 cerr << "*Op.getNode():\n";
2772 Op.getNode()->dump();
2778 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2780 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2781 case ISD::ConstantPool:
2782 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2783 case ISD::GlobalAddress:
2784 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2785 case ISD::JumpTable:
2786 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2788 return LowerConstant(Op, DAG);
2789 case ISD::ConstantFP:
2790 return LowerConstantFP(Op, DAG);
2792 return LowerBRCOND(Op, DAG);
2793 case ISD::FORMAL_ARGUMENTS:
2794 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2796 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2798 return LowerRET(Op, DAG, getTargetMachine());
2801 // i8, i64 math ops:
2802 case ISD::ZERO_EXTEND:
2803 case ISD::SIGN_EXTEND:
2804 case ISD::ANY_EXTEND:
2813 return LowerI8Math(Op, DAG, Opc);
2814 else if (VT == MVT::i64)
2815 return LowerI64Math(Op, DAG, Opc);
2819 // Vector-related lowering.
2820 case ISD::BUILD_VECTOR:
2821 return LowerBUILD_VECTOR(Op, DAG);
2822 case ISD::SCALAR_TO_VECTOR:
2823 return LowerSCALAR_TO_VECTOR(Op, DAG);
2824 case ISD::VECTOR_SHUFFLE:
2825 return LowerVECTOR_SHUFFLE(Op, DAG);
2826 case ISD::EXTRACT_VECTOR_ELT:
2827 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2828 case ISD::INSERT_VECTOR_ELT:
2829 return LowerINSERT_VECTOR_ELT(Op, DAG);
2831 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2835 return LowerByteImmed(Op, DAG);
2837 // Vector and i8 multiply:
2840 return LowerVectorMUL(Op, DAG);
2841 else if (VT == MVT::i8)
2842 return LowerI8Math(Op, DAG, Opc);
2844 return LowerMUL(Op, DAG, VT, Opc);
2847 if (VT == MVT::f32 || VT == MVT::v4f32)
2848 return LowerFDIVf32(Op, DAG);
2850 // This is probably a libcall
2851 else if (Op.getValueType() == MVT::f64)
2852 return LowerFDIVf64(Op, DAG);
2855 assert(0 && "Calling FDIV on unsupported MVT");
2858 return LowerCTPOP(Op, DAG);
2860 case ISD::SELECT_CC:
2861 return LowerSELECT_CC(Op, DAG);
2867 SDNode *SPUTargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG)
2870 unsigned Opc = (unsigned) N->getOpcode();
2871 MVT OpVT = N->getValueType(0);
2875 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2876 cerr << "Op.getOpcode() = " << Opc << "\n";
2877 cerr << "*Op.getNode():\n";
2885 /* Otherwise, return unchanged */
2889 //===----------------------------------------------------------------------===//
2890 // Target Optimization Hooks
2891 //===----------------------------------------------------------------------===//
2894 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2897 TargetMachine &TM = getTargetMachine();
2899 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2900 SelectionDAG &DAG = DCI.DAG;
2901 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2902 SDValue Result; // Initially, NULL result
2904 switch (N->getOpcode()) {
2907 SDValue Op1 = N->getOperand(1);
2909 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2910 SDValue Op01 = Op0.getOperand(1);
2911 if (Op01.getOpcode() == ISD::Constant
2912 || Op01.getOpcode() == ISD::TargetConstant) {
2913 // (add <const>, (SPUindirect <arg>, <const>)) ->
2914 // (SPUindirect <arg>, <const + const>)
2915 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2916 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2917 SDValue combinedConst =
2918 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2919 Op0.getValueType());
2921 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2922 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2923 DEBUG(cerr << "With: (SPUindirect <arg>, "
2924 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2925 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2926 Op0.getOperand(0), combinedConst);
2928 } else if (isa<ConstantSDNode>(Op0)
2929 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2930 SDValue Op11 = Op1.getOperand(1);
2931 if (Op11.getOpcode() == ISD::Constant
2932 || Op11.getOpcode() == ISD::TargetConstant) {
2933 // (add (SPUindirect <arg>, <const>), <const>) ->
2934 // (SPUindirect <arg>, <const + const>)
2935 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2936 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2937 SDValue combinedConst =
2938 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2939 Op0.getValueType());
2941 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2942 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2943 DEBUG(cerr << "With: (SPUindirect <arg>, "
2944 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2946 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2947 Op1.getOperand(0), combinedConst);
2952 case ISD::SIGN_EXTEND:
2953 case ISD::ZERO_EXTEND:
2954 case ISD::ANY_EXTEND: {
2955 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT &&
2956 N->getValueType(0) == Op0.getValueType()) {
2957 // (any_extend (SPUextract_elt0 <arg>)) ->
2958 // (SPUextract_elt0 <arg>)
2959 // Types must match, however...
2960 DEBUG(cerr << "Replace: ");
2961 DEBUG(N->dump(&DAG));
2962 DEBUG(cerr << "\nWith: ");
2963 DEBUG(Op0.getNode()->dump(&DAG));
2964 DEBUG(cerr << "\n");
2970 case SPUISD::IndirectAddr: {
2971 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2972 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2973 if (CN->getZExtValue() == 0) {
2974 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2975 // (SPUaform <addr>, 0)
2977 DEBUG(cerr << "Replace: ");
2978 DEBUG(N->dump(&DAG));
2979 DEBUG(cerr << "\nWith: ");
2980 DEBUG(Op0.getNode()->dump(&DAG));
2981 DEBUG(cerr << "\n");
2988 case SPUISD::SHLQUAD_L_BITS:
2989 case SPUISD::SHLQUAD_L_BYTES:
2990 case SPUISD::VEC_SHL:
2991 case SPUISD::VEC_SRL:
2992 case SPUISD::VEC_SRA:
2993 case SPUISD::ROTQUAD_RZ_BYTES:
2994 case SPUISD::ROTQUAD_RZ_BITS: {
2995 SDValue Op1 = N->getOperand(1);
2997 if (isa<ConstantSDNode>(Op1)) {
2998 // Kill degenerate vector shifts:
2999 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
3001 if (CN->getZExtValue() == 0) {
3007 case SPUISD::PROMOTE_SCALAR: {
3008 switch (Op0.getOpcode()) {
3011 case ISD::ANY_EXTEND:
3012 case ISD::ZERO_EXTEND:
3013 case ISD::SIGN_EXTEND: {
3014 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
3016 // but only if the SPUpromote_scalar and <arg> types match.
3017 SDValue Op00 = Op0.getOperand(0);
3018 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
3019 SDValue Op000 = Op00.getOperand(0);
3020 if (Op000.getValueType() == N->getValueType(0)) {
3026 case SPUISD::VEC2PREFSLOT: {
3027 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
3029 Result = Op0.getOperand(0);
3036 // Otherwise, return unchanged.
3038 if (Result.getNode()) {
3039 DEBUG(cerr << "\nReplace.SPU: ");
3040 DEBUG(N->dump(&DAG));
3041 DEBUG(cerr << "\nWith: ");
3042 DEBUG(Result.getNode()->dump(&DAG));
3043 DEBUG(cerr << "\n");
3050 //===----------------------------------------------------------------------===//
3051 // Inline Assembly Support
3052 //===----------------------------------------------------------------------===//
3054 /// getConstraintType - Given a constraint letter, return the type of
3055 /// constraint it is for this target.
3056 SPUTargetLowering::ConstraintType
3057 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3058 if (ConstraintLetter.size() == 1) {
3059 switch (ConstraintLetter[0]) {
3066 return C_RegisterClass;
3069 return TargetLowering::getConstraintType(ConstraintLetter);
3072 std::pair<unsigned, const TargetRegisterClass*>
3073 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3076 if (Constraint.size() == 1) {
3077 // GCC RS6000 Constraint Letters
3078 switch (Constraint[0]) {
3082 return std::make_pair(0U, SPU::R64CRegisterClass);
3083 return std::make_pair(0U, SPU::R32CRegisterClass);
3086 return std::make_pair(0U, SPU::R32FPRegisterClass);
3087 else if (VT == MVT::f64)
3088 return std::make_pair(0U, SPU::R64FPRegisterClass);
3091 return std::make_pair(0U, SPU::GPRCRegisterClass);
3095 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3098 //! Compute used/known bits for a SPU operand
3100 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3104 const SelectionDAG &DAG,
3105 unsigned Depth ) const {
3107 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
3110 switch (Op.getOpcode()) {
3112 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3122 case SPUISD::PROMOTE_SCALAR: {
3123 SDValue Op0 = Op.getOperand(0);
3124 MVT Op0VT = Op0.getValueType();
3125 unsigned Op0VTBits = Op0VT.getSizeInBits();
3126 uint64_t InMask = Op0VT.getIntegerVTBitMask();
3127 KnownZero |= APInt(Op0VTBits, ~InMask, false);
3128 KnownOne |= APInt(Op0VTBits, InMask, false);
3132 case SPUISD::LDRESULT:
3133 case SPUISD::VEC2PREFSLOT:
3134 case SPUISD::VEC2PREFSLOT_CHAINED: {
3135 MVT OpVT = Op.getValueType();
3136 unsigned OpVTBits = OpVT.getSizeInBits();
3137 uint64_t InMask = OpVT.getIntegerVTBitMask();
3138 KnownZero |= APInt(OpVTBits, ~InMask, false);
3139 KnownOne |= APInt(OpVTBits, InMask, false);
3144 case EXTRACT_I1_ZEXT:
3145 case EXTRACT_I1_SEXT:
3146 case EXTRACT_I8_ZEXT:
3147 case EXTRACT_I8_SEXT:
3152 case SPUISD::SHLQUAD_L_BITS:
3153 case SPUISD::SHLQUAD_L_BYTES:
3154 case SPUISD::VEC_SHL:
3155 case SPUISD::VEC_SRL:
3156 case SPUISD::VEC_SRA:
3157 case SPUISD::VEC_ROTL:
3158 case SPUISD::VEC_ROTR:
3159 case SPUISD::ROTQUAD_RZ_BYTES:
3160 case SPUISD::ROTQUAD_RZ_BITS:
3161 case SPUISD::ROTBYTES_LEFT:
3162 case SPUISD::ROTBYTES_LEFT_CHAINED:
3163 case SPUISD::SELECT_MASK:
3165 case SPUISD::FPInterp:
3166 case SPUISD::FPRecipEst:
3167 case SPUISD::SEXT32TO64:
3172 // LowerAsmOperandForConstraint
3174 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3175 char ConstraintLetter,
3177 std::vector<SDValue> &Ops,
3178 SelectionDAG &DAG) const {
3179 // Default, for the time being, to the base class handler
3180 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3184 /// isLegalAddressImmediate - Return true if the integer value can be used
3185 /// as the offset of the target addressing mode.
3186 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3187 const Type *Ty) const {
3188 // SPU's addresses are 256K:
3189 return (V > -(1 << 18) && V < (1 << 18) - 1);
3192 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3197 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3198 // The SPU target isn't yet aware of offsets.