1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT mapping to useful data for Cell SPU
41 struct valtype_map_s {
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDValue &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDValue &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // Initialize libcalls:
134 setLibcallName(RTLIB::MUL_I64, "__muldi3");
136 // SPU has no sign or zero extended loads for i1, i8, i16:
137 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
138 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
141 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
142 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
143 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
144 setTruncStoreAction(MVT::i8, MVT::i8, Custom);
145 setTruncStoreAction(MVT::i16, MVT::i8, Custom);
146 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
147 setTruncStoreAction(MVT::i64, MVT::i8, Custom);
148 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
150 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
151 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
152 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
154 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Custom);
156 // SPU constant load actions are custom lowered:
157 setOperationAction(ISD::Constant, MVT::i64, Custom);
158 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
159 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
161 // SPU's loads and stores have to be custom lowered:
162 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
164 MVT VT = (MVT::SimpleValueType)sctype;
166 setOperationAction(ISD::LOAD, VT, Custom);
167 setOperationAction(ISD::STORE, VT, Custom);
170 // Custom lower BRCOND for i8 to "promote" the result to i16
171 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
173 // Expand the jumptable branches
174 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
175 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
177 // Custom lower SELECT_CC for most cases, but expand by default
178 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
179 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
180 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
181 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
183 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
186 // SPU has no intrinsics for these particular operations:
187 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
189 // PowerPC has no SREM/UREM instructions
190 setOperationAction(ISD::SREM, MVT::i32, Expand);
191 setOperationAction(ISD::UREM, MVT::i32, Expand);
192 setOperationAction(ISD::SREM, MVT::i64, Expand);
193 setOperationAction(ISD::UREM, MVT::i64, Expand);
195 // We don't support sin/cos/sqrt/fmod
196 setOperationAction(ISD::FSIN , MVT::f64, Expand);
197 setOperationAction(ISD::FCOS , MVT::f64, Expand);
198 setOperationAction(ISD::FREM , MVT::f64, Expand);
199 setOperationAction(ISD::FSIN , MVT::f32, Expand);
200 setOperationAction(ISD::FCOS , MVT::f32, Expand);
201 setOperationAction(ISD::FREM , MVT::f32, Expand);
203 // If we're enabling GP optimizations, use hardware square root
204 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
205 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
207 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
208 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
210 // SPU can do rotate right and left, so legalize it... but customize for i8
211 // because instructions don't exist.
213 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
215 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
216 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
217 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
219 setOperationAction(ISD::ROTL, MVT::i32, Legal);
220 setOperationAction(ISD::ROTL, MVT::i16, Legal);
221 setOperationAction(ISD::ROTL, MVT::i8, Custom);
223 // SPU has no native version of shift left/right for i8
224 setOperationAction(ISD::SHL, MVT::i8, Custom);
225 setOperationAction(ISD::SRL, MVT::i8, Custom);
226 setOperationAction(ISD::SRA, MVT::i8, Custom);
228 // SPU needs custom lowering for shift left/right for i64
229 setOperationAction(ISD::SHL, MVT::i64, Custom);
230 setOperationAction(ISD::SRL, MVT::i64, Custom);
231 setOperationAction(ISD::SRA, MVT::i64, Custom);
233 // Custom lower i8, i32 and i64 multiplications
234 setOperationAction(ISD::MUL, MVT::i8, Custom);
235 setOperationAction(ISD::MUL, MVT::i32, Custom);
236 setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
238 // SMUL_LOHI, UMUL_LOHI
239 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
240 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
241 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom);
242 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
244 // Need to custom handle (some) common i8, i64 math ops
245 setOperationAction(ISD::ADD, MVT::i64, Custom);
246 setOperationAction(ISD::SUB, MVT::i8, Custom);
247 setOperationAction(ISD::SUB, MVT::i64, Custom);
249 // SPU does not have BSWAP. It does have i32 support CTLZ.
250 // CTPOP has to be custom lowered.
251 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
252 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
254 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
255 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
256 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
257 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
259 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
260 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
262 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
264 // SPU has a version of select that implements (a&~c)|(b&c), just like
265 // select ought to work:
266 setOperationAction(ISD::SELECT, MVT::i8, Legal);
267 setOperationAction(ISD::SELECT, MVT::i16, Legal);
268 setOperationAction(ISD::SELECT, MVT::i32, Legal);
269 setOperationAction(ISD::SELECT, MVT::i64, Expand);
271 setOperationAction(ISD::SETCC, MVT::i8, Legal);
272 setOperationAction(ISD::SETCC, MVT::i16, Legal);
273 setOperationAction(ISD::SETCC, MVT::i32, Legal);
274 setOperationAction(ISD::SETCC, MVT::i64, Expand);
276 // Zero extension and sign extension for i64 have to be
278 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
279 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
280 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
282 // Custom lower truncates
283 setOperationAction(ISD::TRUNCATE, MVT::i8, Custom);
284 setOperationAction(ISD::TRUNCATE, MVT::i16, Custom);
285 setOperationAction(ISD::TRUNCATE, MVT::i32, Custom);
286 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
288 // SPU has a legal FP -> signed INT instruction
289 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
290 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
291 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
292 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
294 // FDIV on SPU requires custom lowering
295 setOperationAction(ISD::FDIV, MVT::f32, Custom);
296 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
298 // SPU has [U|S]INT_TO_FP
299 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
300 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
301 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
302 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
303 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
304 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
305 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
306 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
308 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
309 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
310 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
311 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
313 // We cannot sextinreg(i1). Expand to shifts.
314 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
316 // Support label based line numbers.
317 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
318 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
320 // We want to legalize GlobalAddress and ConstantPool nodes into the
321 // appropriate instructions to materialize the address.
322 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
324 MVT VT = (MVT::SimpleValueType)sctype;
326 setOperationAction(ISD::GlobalAddress, VT, Custom);
327 setOperationAction(ISD::ConstantPool, VT, Custom);
328 setOperationAction(ISD::JumpTable, VT, Custom);
331 // RET must be custom lowered, to meet ABI requirements
332 setOperationAction(ISD::RET, MVT::Other, Custom);
334 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
335 setOperationAction(ISD::VASTART , MVT::Other, Custom);
337 // Use the default implementation.
338 setOperationAction(ISD::VAARG , MVT::Other, Expand);
339 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
340 setOperationAction(ISD::VAEND , MVT::Other, Expand);
341 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
342 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
343 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
344 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
346 // Cell SPU has instructions for converting between i64 and fp.
347 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
348 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
350 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
351 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
353 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
354 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
356 // First set operation action for all vector types to expand. Then we
357 // will selectively turn on ones that can be effectively codegen'd.
358 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
359 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
360 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
361 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
362 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
363 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
365 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
366 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
367 MVT VT = (MVT::SimpleValueType)i;
369 // add/sub are legal for all supported vector VT's.
370 setOperationAction(ISD::ADD , VT, Legal);
371 setOperationAction(ISD::SUB , VT, Legal);
372 // mul has to be custom lowered.
373 setOperationAction(ISD::MUL , VT, Custom);
375 setOperationAction(ISD::AND , VT, Legal);
376 setOperationAction(ISD::OR , VT, Legal);
377 setOperationAction(ISD::XOR , VT, Legal);
378 setOperationAction(ISD::LOAD , VT, Legal);
379 setOperationAction(ISD::SELECT, VT, Legal);
380 setOperationAction(ISD::STORE, VT, Legal);
382 // These operations need to be expanded:
383 setOperationAction(ISD::SDIV, VT, Expand);
384 setOperationAction(ISD::SREM, VT, Expand);
385 setOperationAction(ISD::UDIV, VT, Expand);
386 setOperationAction(ISD::UREM, VT, Expand);
387 setOperationAction(ISD::FDIV, VT, Custom);
389 // Custom lower build_vector, constant pool spills, insert and
390 // extract vector elements:
391 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
392 setOperationAction(ISD::ConstantPool, VT, Custom);
393 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
394 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
395 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
396 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
399 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
400 setOperationAction(ISD::AND, MVT::v16i8, Custom);
401 setOperationAction(ISD::OR, MVT::v16i8, Custom);
402 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
403 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
405 setShiftAmountType(MVT::i32);
406 setBooleanContents(ZeroOrOneBooleanContent);
408 setStackPointerRegisterToSaveRestore(SPU::R1);
410 // We have target-specific dag combine patterns for the following nodes:
411 setTargetDAGCombine(ISD::ADD);
412 setTargetDAGCombine(ISD::ZERO_EXTEND);
413 setTargetDAGCombine(ISD::SIGN_EXTEND);
414 setTargetDAGCombine(ISD::ANY_EXTEND);
416 computeRegisterProperties();
418 // Set other properties:
419 setSchedulingPreference(SchedulingForLatency);
423 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
425 if (node_names.empty()) {
426 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
427 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
428 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
429 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
430 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
431 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
432 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
433 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
434 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
435 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
436 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
437 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
438 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
439 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
440 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
441 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
442 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
443 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
444 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
445 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
446 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
447 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
448 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
449 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
450 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
451 "SPUISD::ROTQUAD_RZ_BYTES";
452 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
453 "SPUISD::ROTQUAD_RZ_BITS";
454 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
455 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
456 "SPUISD::ROTBYTES_LEFT_BITS";
457 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
458 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
459 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
460 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
461 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
462 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
463 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
464 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
465 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
468 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
470 return ((i != node_names.end()) ? i->second : 0);
473 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
474 MVT VT = Op.getValueType();
475 return (VT.isInteger() ? VT : MVT(MVT::i32));
478 //===----------------------------------------------------------------------===//
479 // Calling convention code:
480 //===----------------------------------------------------------------------===//
482 #include "SPUGenCallingConv.inc"
484 //===----------------------------------------------------------------------===//
485 // LowerOperation implementation
486 //===----------------------------------------------------------------------===//
488 /// Aligned load common code for CellSPU
490 \param[in] Op The SelectionDAG load or store operand
491 \param[in] DAG The selection DAG
492 \param[in] ST CellSPU subtarget information structure
493 \param[in,out] alignment Caller initializes this to the load or store node's
494 value from getAlignment(), may be updated while generating the aligned load
495 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
496 offset (divisible by 16, modulo 16 == 0)
497 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
498 offset of the preferred slot (modulo 16 != 0)
499 \param[in,out] VT Caller initializes this value type to the the load or store
500 node's loaded or stored value type; may be updated if an i1-extended load or
502 \param[out] was16aligned true if the base pointer had 16-byte alignment,
503 otherwise false. Can help to determine if the chunk needs to be rotated.
505 Both load and store lowering load a block of data aligned on a 16-byte
506 boundary. This is the common aligned load code shared between both.
509 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
511 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
512 MVT &VT, bool &was16aligned)
514 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
515 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
516 SDValue basePtr = LSN->getBasePtr();
517 SDValue chain = LSN->getChain();
519 if (basePtr.getOpcode() == ISD::ADD) {
520 SDValue Op1 = basePtr.getNode()->getOperand(1);
522 if (Op1.getOpcode() == ISD::Constant
523 || Op1.getOpcode() == ISD::TargetConstant) {
524 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
526 alignOffs = (int) CN->getZExtValue();
527 prefSlotOffs = (int) (alignOffs & 0xf);
529 // Adjust the rotation amount to ensure that the final result ends up in
530 // the preferred slot:
531 prefSlotOffs -= vtm->prefslot_byte;
532 basePtr = basePtr.getOperand(0);
534 // Loading from memory, can we adjust alignment?
535 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
536 SDValue APtr = basePtr.getOperand(0);
537 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
538 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
539 alignment = GSDN->getGlobal()->getAlignment();
544 prefSlotOffs = -vtm->prefslot_byte;
546 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
547 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
548 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
549 prefSlotOffs = (int) (alignOffs & 0xf);
550 prefSlotOffs -= vtm->prefslot_byte;
553 prefSlotOffs = -vtm->prefslot_byte;
556 if (alignment == 16) {
557 // Realign the base pointer as a D-Form address:
558 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
559 basePtr = DAG.getNode(ISD::ADD, PtrVT,
561 DAG.getConstant((alignOffs & ~0xf), PtrVT));
564 // Emit the vector load:
566 return DAG.getLoad(MVT::v16i8, chain, basePtr,
567 LSN->getSrcValue(), LSN->getSrcValueOffset(),
568 LSN->isVolatile(), 16);
571 // Unaligned load or we're using the "large memory" model, which means that
572 // we have to be very pessimistic:
573 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
574 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
575 DAG.getConstant(0, PtrVT));
579 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
580 DAG.getConstant((alignOffs & ~0xf), PtrVT));
581 was16aligned = false;
582 return DAG.getLoad(MVT::v16i8, chain, basePtr,
583 LSN->getSrcValue(), LSN->getSrcValueOffset(),
584 LSN->isVolatile(), 16);
587 /// Custom lower loads for CellSPU
589 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
590 within a 16-byte block, we have to rotate to extract the requested element.
592 For extending loads, we also want to ensure that the following sequence is
593 emitted, e.g. for MVT::f32 extending load to MVT::f64:
597 %2 v16i8,ch = rotate %1
598 %3 v4f8, ch = bitconvert %2
599 %4 f32 = vec2perfslot %3
600 %5 f64 = fp_extend %4
604 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
605 LoadSDNode *LN = cast<LoadSDNode>(Op);
606 SDValue the_chain = LN->getChain();
607 MVT InVT = LN->getMemoryVT();
608 MVT OutVT = Op.getValueType();
609 ISD::LoadExtType ExtType = LN->getExtensionType();
610 unsigned alignment = LN->getAlignment();
613 switch (LN->getAddressingMode()) {
614 case ISD::UNINDEXED: {
618 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, InVT,
621 if (result.getNode() == 0)
624 the_chain = result.getValue(1);
625 // Rotate the chunk if necessary
628 if (rotamt != 0 || !was16aligned) {
629 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
633 Ops[1] = DAG.getConstant(rotamt, MVT::i16);
635 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
636 LoadSDNode *LN1 = cast<LoadSDNode>(result);
637 Ops[1] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
638 DAG.getConstant(rotamt, PtrVT));
641 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8, Ops, 2);
644 // Convert the loaded v16i8 vector to the appropriate vector type
645 // specified by the operand:
646 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
647 result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT,
648 DAG.getNode(ISD::BIT_CONVERT, vecVT, result));
650 // Handle extending loads by extending the scalar result:
651 if (ExtType == ISD::SEXTLOAD) {
652 result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result);
653 } else if (ExtType == ISD::ZEXTLOAD) {
654 result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result);
655 } else if (ExtType == ISD::EXTLOAD) {
656 unsigned NewOpc = ISD::ANY_EXTEND;
658 if (OutVT.isFloatingPoint())
659 NewOpc = ISD::FP_EXTEND;
661 result = DAG.getNode(NewOpc, OutVT, result);
664 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
665 SDValue retops[2] = {
670 result = DAG.getNode(SPUISD::LDRESULT, retvts,
671 retops, sizeof(retops) / sizeof(retops[0]));
678 case ISD::LAST_INDEXED_MODE:
679 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
681 cerr << (unsigned) LN->getAddressingMode() << "\n";
689 /// Custom lower stores for CellSPU
691 All CellSPU stores are aligned to 16-byte boundaries, so for elements
692 within a 16-byte block, we have to generate a shuffle to insert the
693 requested element into its place, then store the resulting block.
696 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
697 StoreSDNode *SN = cast<StoreSDNode>(Op);
698 SDValue Value = SN->getValue();
699 MVT VT = Value.getValueType();
700 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
701 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
702 unsigned alignment = SN->getAlignment();
704 switch (SN->getAddressingMode()) {
705 case ISD::UNINDEXED: {
706 int chunk_offset, slot_offset;
709 // The vector type we really want to load from the 16-byte chunk.
710 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
711 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
713 SDValue alignLoadVec =
714 AlignedLoad(Op, DAG, ST, SN, alignment,
715 chunk_offset, slot_offset, VT, was16aligned);
717 if (alignLoadVec.getNode() == 0)
720 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
721 SDValue basePtr = LN->getBasePtr();
722 SDValue the_chain = alignLoadVec.getValue(1);
723 SDValue theValue = SN->getValue();
727 && (theValue.getOpcode() == ISD::AssertZext
728 || theValue.getOpcode() == ISD::AssertSext)) {
729 // Drill down and get the value for zero- and sign-extended
731 theValue = theValue.getOperand(0);
736 SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
737 SDValue insertEltPtr;
739 // If the base pointer is already a D-form address, then just create
740 // a new D-form address with a slot offset and the orignal base pointer.
741 // Otherwise generate a D-form address with the slot offset relative
742 // to the stack pointer, which is always aligned.
743 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
744 DEBUG(basePtr.getNode()->dump(&DAG));
747 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
748 (basePtr.getOpcode() == ISD::ADD
749 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
750 insertEltPtr = basePtr;
752 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
755 SDValue insertEltOp =
756 DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltPtr);
757 SDValue vectorizeOp =
758 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
760 result = DAG.getNode(SPUISD::SHUFB, vecVT,
761 vectorizeOp, alignLoadVec,
762 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp));
764 result = DAG.getStore(the_chain, result, basePtr,
765 LN->getSrcValue(), LN->getSrcValueOffset(),
766 LN->isVolatile(), LN->getAlignment());
768 #if 0 && !defined(NDEBUG)
769 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
770 const SDValue ¤tRoot = DAG.getRoot();
773 cerr << "------- CellSPU:LowerStore result:\n";
776 DAG.setRoot(currentRoot);
787 case ISD::LAST_INDEXED_MODE:
788 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
790 cerr << (unsigned) SN->getAddressingMode() << "\n";
798 /// Generate the address of a constant pool entry.
800 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
801 MVT PtrVT = Op.getValueType();
802 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
803 Constant *C = CP->getConstVal();
804 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
805 SDValue Zero = DAG.getConstant(0, PtrVT);
806 const TargetMachine &TM = DAG.getTarget();
808 if (TM.getRelocationModel() == Reloc::Static) {
809 if (!ST->usingLargeMem()) {
810 // Just return the SDValue with the constant pool address in it.
811 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
813 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
814 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
815 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
820 "LowerConstantPool: Relocation model other than static"
826 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
827 MVT PtrVT = Op.getValueType();
828 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
829 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
830 SDValue Zero = DAG.getConstant(0, PtrVT);
831 const TargetMachine &TM = DAG.getTarget();
833 if (TM.getRelocationModel() == Reloc::Static) {
834 if (!ST->usingLargeMem()) {
835 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
837 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
838 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
839 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
844 "LowerJumpTable: Relocation model other than static not supported.");
849 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
850 MVT PtrVT = Op.getValueType();
851 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
852 GlobalValue *GV = GSDN->getGlobal();
853 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
854 const TargetMachine &TM = DAG.getTarget();
855 SDValue Zero = DAG.getConstant(0, PtrVT);
857 if (TM.getRelocationModel() == Reloc::Static) {
858 if (!ST->usingLargeMem()) {
859 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
861 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
862 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
863 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
866 cerr << "LowerGlobalAddress: Relocation model other than static not "
875 //! Custom lower i64 integer constants
877 This code inserts all of the necessary juggling that needs to occur to load
878 a 64-bit constant into a register.
881 LowerConstant(SDValue Op, SelectionDAG &DAG) {
882 MVT VT = Op.getValueType();
884 if (VT == MVT::i64) {
885 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
886 SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
887 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
888 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
890 cerr << "LowerConstant: unhandled constant type "
900 //! Custom lower double precision floating point constants
902 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
903 MVT VT = Op.getValueType();
905 if (VT == MVT::f64) {
906 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
909 "LowerConstantFP: Node is not ConstantFPSDNode");
911 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
912 SDValue T = DAG.getConstant(dbits, MVT::i64);
913 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
914 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
915 DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
921 //! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
923 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
925 SDValue Cond = Op.getOperand(1);
926 MVT CondVT = Cond.getValueType();
929 if (CondVT == MVT::i8) {
931 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
933 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
936 return SDValue(); // Unchanged
940 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
942 MachineFunction &MF = DAG.getMachineFunction();
943 MachineFrameInfo *MFI = MF.getFrameInfo();
944 MachineRegisterInfo &RegInfo = MF.getRegInfo();
945 SmallVector<SDValue, 48> ArgValues;
946 SDValue Root = Op.getOperand(0);
947 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
949 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
950 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
952 unsigned ArgOffset = SPUFrameInfo::minStackSize();
953 unsigned ArgRegIdx = 0;
954 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
956 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
958 // Add DAG nodes to load the arguments or copy them out of registers.
959 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
960 ArgNo != e; ++ArgNo) {
961 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
962 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
965 if (ArgRegIdx < NumArgRegs) {
966 const TargetRegisterClass *ArgRegClass;
968 switch (ObjectVT.getSimpleVT()) {
970 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
971 << ObjectVT.getMVTString()
976 ArgRegClass = &SPU::R8CRegClass;
979 ArgRegClass = &SPU::R16CRegClass;
982 ArgRegClass = &SPU::R32CRegClass;
985 ArgRegClass = &SPU::R64CRegClass;
988 ArgRegClass = &SPU::R32FPRegClass;
991 ArgRegClass = &SPU::R64FPRegClass;
999 ArgRegClass = &SPU::VECREGRegClass;
1003 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1004 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1005 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1008 // We need to load the argument to a virtual register if we determined
1009 // above that we ran out of physical registers of the appropriate type
1010 // or we're forced to do vararg
1011 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1012 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1013 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1014 ArgOffset += StackSlotSize;
1017 ArgValues.push_back(ArgVal);
1019 Root = ArgVal.getOperand(0);
1024 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1025 // We will spill (79-3)+1 registers to the stack
1026 SmallVector<SDValue, 79-3+1> MemOps;
1028 // Create the frame slot
1030 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1031 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1032 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1033 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1034 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1035 Root = Store.getOperand(0);
1036 MemOps.push_back(Store);
1038 // Increment address by stack slot size for the next stored argument
1039 ArgOffset += StackSlotSize;
1041 if (!MemOps.empty())
1042 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1045 ArgValues.push_back(Root);
1047 // Return the new list of results.
1048 return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(),
1049 &ArgValues[0], ArgValues.size());
1052 /// isLSAAddress - Return the immediate to use if the specified
1053 /// value is representable as a LSA address.
1054 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1055 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1058 int Addr = C->getZExtValue();
1059 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1060 (Addr << 14 >> 14) != Addr)
1061 return 0; // Top 14 bits have to be sext of immediate.
1063 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1068 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1069 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1070 SDValue Chain = TheCall->getChain();
1071 SDValue Callee = TheCall->getCallee();
1072 unsigned NumOps = TheCall->getNumArgs();
1073 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1074 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1075 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1077 // Handy pointer type
1078 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1080 // Accumulate how many bytes are to be pushed on the stack, including the
1081 // linkage area, and parameter passing area. According to the SPU ABI,
1082 // we minimally need space for [LR] and [SP]
1083 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1085 // Set up a copy of the stack pointer for use loading and storing any
1086 // arguments that may not fit in the registers available for argument
1088 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1090 // Figure out which arguments are going to go in registers, and which in
1092 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1093 unsigned ArgRegIdx = 0;
1095 // Keep track of registers passing arguments
1096 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1097 // And the arguments passed on the stack
1098 SmallVector<SDValue, 8> MemOpChains;
1100 for (unsigned i = 0; i != NumOps; ++i) {
1101 SDValue Arg = TheCall->getArg(i);
1103 // PtrOff will be used to store the current argument to the stack if a
1104 // register cannot be found for it.
1105 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1106 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1108 switch (Arg.getValueType().getSimpleVT()) {
1109 default: assert(0 && "Unexpected ValueType for argument!");
1113 if (ArgRegIdx != NumArgRegs) {
1114 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1116 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1117 ArgOffset += StackSlotSize;
1122 if (ArgRegIdx != NumArgRegs) {
1123 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1125 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1126 ArgOffset += StackSlotSize;
1135 if (ArgRegIdx != NumArgRegs) {
1136 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1138 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1139 ArgOffset += StackSlotSize;
1145 // Update number of stack bytes actually used, insert a call sequence start
1146 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1147 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1150 if (!MemOpChains.empty()) {
1151 // Adjust the stack pointer for the stack arguments.
1152 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1153 &MemOpChains[0], MemOpChains.size());
1156 // Build a sequence of copy-to-reg nodes chained together with token chain
1157 // and flag operands which copy the outgoing args into the appropriate regs.
1159 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1160 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1162 InFlag = Chain.getValue(1);
1165 SmallVector<SDValue, 8> Ops;
1166 unsigned CallOpc = SPUISD::CALL;
1168 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1169 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1170 // node so that legalize doesn't hack it.
1171 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1172 GlobalValue *GV = G->getGlobal();
1173 MVT CalleeVT = Callee.getValueType();
1174 SDValue Zero = DAG.getConstant(0, PtrVT);
1175 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1177 if (!ST->usingLargeMem()) {
1178 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1179 // style calls, otherwise, external symbols are BRASL calls. This assumes
1180 // that declared/defined symbols are in the same compilation unit and can
1181 // be reached through PC-relative jumps.
1184 // This may be an unsafe assumption for JIT and really large compilation
1186 if (GV->isDeclaration()) {
1187 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1189 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1192 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1194 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1196 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1197 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1198 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1199 // If this is an absolute destination address that appears to be a legal
1200 // local store address, use the munged value.
1201 Callee = SDValue(Dest, 0);
1204 Ops.push_back(Chain);
1205 Ops.push_back(Callee);
1207 // Add argument registers to the end of the list so that they are known live
1209 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1210 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1211 RegsToPass[i].second.getValueType()));
1213 if (InFlag.getNode())
1214 Ops.push_back(InFlag);
1215 // Returns a chain and a flag for retval copy to use.
1216 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1217 &Ops[0], Ops.size());
1218 InFlag = Chain.getValue(1);
1220 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1221 DAG.getIntPtrConstant(0, true), InFlag);
1222 if (TheCall->getValueType(0) != MVT::Other)
1223 InFlag = Chain.getValue(1);
1225 SDValue ResultVals[3];
1226 unsigned NumResults = 0;
1228 // If the call has results, copy the values out of the ret val registers.
1229 switch (TheCall->getValueType(0).getSimpleVT()) {
1230 default: assert(0 && "Unexpected ret value!");
1231 case MVT::Other: break;
1233 if (TheCall->getValueType(1) == MVT::i32) {
1234 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1235 ResultVals[0] = Chain.getValue(0);
1236 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1237 Chain.getValue(2)).getValue(1);
1238 ResultVals[1] = Chain.getValue(0);
1241 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1242 ResultVals[0] = Chain.getValue(0);
1247 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1248 ResultVals[0] = Chain.getValue(0);
1253 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1254 InFlag).getValue(1);
1255 ResultVals[0] = Chain.getValue(0);
1264 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1265 InFlag).getValue(1);
1266 ResultVals[0] = Chain.getValue(0);
1271 // If the function returns void, just return the chain.
1272 if (NumResults == 0)
1275 // Otherwise, merge everything together with a MERGE_VALUES node.
1276 ResultVals[NumResults++] = Chain;
1277 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1278 return Res.getValue(Op.getResNo());
1282 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1283 SmallVector<CCValAssign, 16> RVLocs;
1284 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1285 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1286 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1287 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1289 // If this is the first return lowered for this function, add the regs to the
1290 // liveout set for the function.
1291 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1292 for (unsigned i = 0; i != RVLocs.size(); ++i)
1293 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1296 SDValue Chain = Op.getOperand(0);
1299 // Copy the result values into the output registers.
1300 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1301 CCValAssign &VA = RVLocs[i];
1302 assert(VA.isRegLoc() && "Can only return in registers!");
1303 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1304 Flag = Chain.getValue(1);
1308 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1310 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1314 //===----------------------------------------------------------------------===//
1315 // Vector related lowering:
1316 //===----------------------------------------------------------------------===//
1318 static ConstantSDNode *
1319 getVecImm(SDNode *N) {
1320 SDValue OpVal(0, 0);
1322 // Check to see if this buildvec has a single non-undef value in its elements.
1323 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1324 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1325 if (OpVal.getNode() == 0)
1326 OpVal = N->getOperand(i);
1327 else if (OpVal != N->getOperand(i))
1331 if (OpVal.getNode() != 0) {
1332 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1337 return 0; // All UNDEF: use implicit def.; not Constant node
1340 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1341 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1343 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1345 if (ConstantSDNode *CN = getVecImm(N)) {
1346 uint64_t Value = CN->getZExtValue();
1347 if (ValueType == MVT::i64) {
1348 uint64_t UValue = CN->getZExtValue();
1349 uint32_t upper = uint32_t(UValue >> 32);
1350 uint32_t lower = uint32_t(UValue);
1353 Value = Value >> 32;
1355 if (Value <= 0x3ffff)
1356 return DAG.getTargetConstant(Value, ValueType);
1362 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1363 /// and the value fits into a signed 16-bit constant, and if so, return the
1365 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1367 if (ConstantSDNode *CN = getVecImm(N)) {
1368 int64_t Value = CN->getSExtValue();
1369 if (ValueType == MVT::i64) {
1370 uint64_t UValue = CN->getZExtValue();
1371 uint32_t upper = uint32_t(UValue >> 32);
1372 uint32_t lower = uint32_t(UValue);
1375 Value = Value >> 32;
1377 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1378 return DAG.getTargetConstant(Value, ValueType);
1385 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1386 /// and the value fits into a signed 10-bit constant, and if so, return the
1388 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1390 if (ConstantSDNode *CN = getVecImm(N)) {
1391 int64_t Value = CN->getSExtValue();
1392 if (ValueType == MVT::i64) {
1393 uint64_t UValue = CN->getZExtValue();
1394 uint32_t upper = uint32_t(UValue >> 32);
1395 uint32_t lower = uint32_t(UValue);
1398 Value = Value >> 32;
1400 if (isS10Constant(Value))
1401 return DAG.getTargetConstant(Value, ValueType);
1407 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1408 /// and the value fits into a signed 8-bit constant, and if so, return the
1411 /// @note: The incoming vector is v16i8 because that's the only way we can load
1412 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1414 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1416 if (ConstantSDNode *CN = getVecImm(N)) {
1417 int Value = (int) CN->getZExtValue();
1418 if (ValueType == MVT::i16
1419 && Value <= 0xffff /* truncated from uint64_t */
1420 && ((short) Value >> 8) == ((short) Value & 0xff))
1421 return DAG.getTargetConstant(Value & 0xff, ValueType);
1422 else if (ValueType == MVT::i8
1423 && (Value & 0xff) == Value)
1424 return DAG.getTargetConstant(Value, ValueType);
1430 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1431 /// and the value fits into a signed 16-bit constant, and if so, return the
1433 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1435 if (ConstantSDNode *CN = getVecImm(N)) {
1436 uint64_t Value = CN->getZExtValue();
1437 if ((ValueType == MVT::i32
1438 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1439 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1440 return DAG.getTargetConstant(Value >> 16, ValueType);
1446 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1447 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1448 if (ConstantSDNode *CN = getVecImm(N)) {
1449 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1455 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1456 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1457 if (ConstantSDNode *CN = getVecImm(N)) {
1458 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1464 // If this is a vector of constants or undefs, get the bits. A bit in
1465 // UndefBits is set if the corresponding element of the vector is an
1466 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1467 // zero. Return true if this is not an array of constants, false if it is.
1469 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1470 uint64_t UndefBits[2]) {
1471 // Start with zero'd results.
1472 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1474 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1475 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1476 SDValue OpVal = BV->getOperand(i);
1478 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1479 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1481 uint64_t EltBits = 0;
1482 if (OpVal.getOpcode() == ISD::UNDEF) {
1483 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1484 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1486 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1487 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1488 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1489 const APFloat &apf = CN->getValueAPF();
1490 EltBits = (CN->getValueType(0) == MVT::f32
1491 ? FloatToBits(apf.convertToFloat())
1492 : DoubleToBits(apf.convertToDouble()));
1494 // Nonconstant element.
1498 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1501 //printf("%llx %llx %llx %llx\n",
1502 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1506 /// If this is a splat (repetition) of a value across the whole vector, return
1507 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1508 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1509 /// SplatSize = 1 byte.
1510 static bool isConstantSplat(const uint64_t Bits128[2],
1511 const uint64_t Undef128[2],
1513 uint64_t &SplatBits, uint64_t &SplatUndef,
1515 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1516 // the same as the lower 64-bits, ignoring undefs.
1517 uint64_t Bits64 = Bits128[0] | Bits128[1];
1518 uint64_t Undef64 = Undef128[0] & Undef128[1];
1519 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1520 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1521 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1522 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1524 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1525 if (MinSplatBits < 64) {
1527 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1529 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1530 if (MinSplatBits < 32) {
1532 // If the top 16-bits are different than the lower 16-bits, ignoring
1533 // undefs, we have an i32 splat.
1534 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1535 if (MinSplatBits < 16) {
1536 // If the top 8-bits are different than the lower 8-bits, ignoring
1537 // undefs, we have an i16 splat.
1538 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1539 == ((Bits16 >> 8) & ~Undef16)) {
1540 // Otherwise, we have an 8-bit splat.
1541 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1542 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1548 SplatUndef = Undef16;
1555 SplatUndef = Undef32;
1561 SplatBits = Bits128[0];
1562 SplatUndef = Undef128[0];
1568 return false; // Can't be a splat if two pieces don't match.
1571 // If this is a case we can't handle, return null and let the default
1572 // expansion code take care of it. If we CAN select this case, and if it
1573 // selects to a single instruction, return Op. Otherwise, if we can codegen
1574 // this case more efficiently than a constant pool load, lower it to the
1575 // sequence of ops that should be used.
1576 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1577 MVT VT = Op.getValueType();
1578 // If this is a vector of constants or undefs, get the bits. A bit in
1579 // UndefBits is set if the corresponding element of the vector is an
1580 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1582 uint64_t VectorBits[2];
1583 uint64_t UndefBits[2];
1584 uint64_t SplatBits, SplatUndef;
1586 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1587 || !isConstantSplat(VectorBits, UndefBits,
1588 VT.getVectorElementType().getSizeInBits(),
1589 SplatBits, SplatUndef, SplatSize))
1590 return SDValue(); // Not a constant vector, not a splat.
1592 switch (VT.getSimpleVT()) {
1595 uint32_t Value32 = SplatBits;
1596 assert(SplatSize == 4
1597 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1598 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1599 SDValue T = DAG.getConstant(Value32, MVT::i32);
1600 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1601 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1605 uint64_t f64val = SplatBits;
1606 assert(SplatSize == 8
1607 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1608 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1609 SDValue T = DAG.getConstant(f64val, MVT::i64);
1610 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1611 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1615 // 8-bit constants have to be expanded to 16-bits
1616 unsigned short Value16 = SplatBits | (SplatBits << 8);
1618 for (int i = 0; i < 8; ++i)
1619 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1620 return DAG.getNode(ISD::BIT_CONVERT, VT,
1621 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1624 unsigned short Value16;
1626 Value16 = (unsigned short) (SplatBits & 0xffff);
1628 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1629 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1631 for (int i = 0; i < 8; ++i) Ops[i] = T;
1632 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1635 unsigned int Value = SplatBits;
1636 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1637 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1640 uint64_t val = SplatBits;
1641 uint32_t upper = uint32_t(val >> 32);
1642 uint32_t lower = uint32_t(val);
1644 if (upper == lower) {
1645 // Magic constant that can be matched by IL, ILA, et. al.
1646 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1647 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1651 SmallVector<SDValue, 16> ShufBytes;
1653 bool upper_special, lower_special;
1655 // NOTE: This code creates common-case shuffle masks that can be easily
1656 // detected as common expressions. It is not attempting to create highly
1657 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1659 // Detect if the upper or lower half is a special shuffle mask pattern:
1660 upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
1661 lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
1663 // Create lower vector if not a special pattern
1664 if (!lower_special) {
1665 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1666 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1667 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1668 LO32C, LO32C, LO32C, LO32C));
1671 // Create upper vector if not a special pattern
1672 if (!upper_special) {
1673 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1674 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1675 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1676 HI32C, HI32C, HI32C, HI32C));
1679 // If either upper or lower are special, then the two input operands are
1680 // the same (basically, one of them is a "don't care")
1685 if (lower_special && upper_special) {
1686 // Unhappy situation... both upper and lower are special, so punt with
1687 // a target constant:
1688 SDValue Zero = DAG.getConstant(0, MVT::i32);
1689 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1693 for (int i = 0; i < 4; ++i) {
1695 for (int j = 0; j < 4; ++j) {
1697 bool process_upper, process_lower;
1699 process_upper = (upper_special && (i & 1) == 0);
1700 process_lower = (lower_special && (i & 1) == 1);
1702 if (process_upper || process_lower) {
1703 if ((process_upper && upper == 0)
1704 || (process_lower && lower == 0))
1706 else if ((process_upper && upper == 0xffffffff)
1707 || (process_lower && lower == 0xffffffff))
1709 else if ((process_upper && upper == 0x80000000)
1710 || (process_lower && lower == 0x80000000))
1711 val |= (j == 0 ? 0xe0 : 0x80);
1713 val |= i * 4 + j + ((i & 1) * 16);
1716 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1719 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1720 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1721 &ShufBytes[0], ShufBytes.size()));
1729 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1730 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1731 /// permutation vector, V3, is monotonically increasing with one "exception"
1732 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1733 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1734 /// In either case, the net result is going to eventually invoke SHUFB to
1735 /// permute/shuffle the bytes from V1 and V2.
1737 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1738 /// control word for byte/halfword/word insertion. This takes care of a single
1739 /// element move from V2 into V1.
1741 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1742 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1743 SDValue V1 = Op.getOperand(0);
1744 SDValue V2 = Op.getOperand(1);
1745 SDValue PermMask = Op.getOperand(2);
1747 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1749 // If we have a single element being moved from V1 to V2, this can be handled
1750 // using the C*[DX] compute mask instructions, but the vector elements have
1751 // to be monotonically increasing with one exception element.
1752 MVT VecVT = V1.getValueType();
1753 MVT EltVT = VecVT.getVectorElementType();
1754 unsigned EltsFromV2 = 0;
1756 unsigned V2EltIdx0 = 0;
1757 unsigned CurrElt = 0;
1758 unsigned MaxElts = VecVT.getVectorNumElements();
1759 unsigned PrevElt = 0;
1761 bool monotonic = true;
1764 if (EltVT == MVT::i8) {
1766 } else if (EltVT == MVT::i16) {
1768 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1770 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1773 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1775 for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
1776 if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
1777 unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
1780 if (SrcElt >= V2EltIdx0) {
1781 if (1 >= (++EltsFromV2)) {
1782 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1784 } else if (CurrElt != SrcElt) {
1792 if (PrevElt > 0 && SrcElt < MaxElts) {
1793 if ((PrevElt == SrcElt - 1)
1794 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1801 } else if (PrevElt == 0) {
1802 // First time through, need to keep track of previous element
1805 // This isn't a rotation, takes elements from vector 2
1812 if (EltsFromV2 == 1 && monotonic) {
1813 // Compute mask and shuffle
1814 MachineFunction &MF = DAG.getMachineFunction();
1815 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1816 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1817 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1818 // Initialize temporary register to 0
1819 SDValue InitTempReg =
1820 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1821 // Copy register's contents as index in SHUFFLE_MASK:
1822 SDValue ShufMaskOp =
1823 DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
1824 DAG.getTargetConstant(V2Elt, MVT::i32),
1825 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1826 // Use shuffle mask in SHUFB synthetic instruction:
1827 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1828 } else if (rotate) {
1829 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1831 return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(),
1832 V1, DAG.getConstant(rotamt, MVT::i16));
1834 // Convert the SHUFFLE_VECTOR mask's input element units to the
1836 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1838 SmallVector<SDValue, 16> ResultMask;
1839 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1841 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1844 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1846 for (unsigned j = 0; j < BytesPerElement; ++j) {
1847 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1852 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1853 &ResultMask[0], ResultMask.size());
1854 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1858 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1859 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1861 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1862 // For a constant, build the appropriate constant vector, which will
1863 // eventually simplify to a vector register load.
1865 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1866 SmallVector<SDValue, 16> ConstVecValues;
1870 // Create a constant vector:
1871 switch (Op.getValueType().getSimpleVT()) {
1872 default: assert(0 && "Unexpected constant value type in "
1873 "LowerSCALAR_TO_VECTOR");
1874 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1875 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1876 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1877 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1878 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1879 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1882 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1883 for (size_t j = 0; j < n_copies; ++j)
1884 ConstVecValues.push_back(CValue);
1886 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1887 &ConstVecValues[0], ConstVecValues.size());
1889 // Otherwise, copy the value from one register to another:
1890 switch (Op0.getValueType().getSimpleVT()) {
1891 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1898 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1905 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1906 switch (Op.getValueType().getSimpleVT()) {
1908 cerr << "CellSPU: Unknown vector multiplication, got "
1909 << Op.getValueType().getMVTString()
1915 SDValue rA = Op.getOperand(0);
1916 SDValue rB = Op.getOperand(1);
1917 SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1918 SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1919 SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1920 SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1922 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1926 // Multiply two v8i16 vectors (pipeline friendly version):
1927 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1928 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1929 // c) Use SELB to select upper and lower halves from the intermediate results
1931 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1932 // dual-issue. This code does manage to do this, even if it's a little on
1935 MachineFunction &MF = DAG.getMachineFunction();
1936 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1937 SDValue Chain = Op.getOperand(0);
1938 SDValue rA = Op.getOperand(0);
1939 SDValue rB = Op.getOperand(1);
1940 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1941 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1944 DAG.getCopyToReg(Chain, FSMBIreg,
1945 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1946 DAG.getConstant(0xcccc, MVT::i16)));
1949 DAG.getCopyToReg(FSMBOp, HiProdReg,
1950 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1952 SDValue HHProd_v4i32 =
1953 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1954 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1956 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1957 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1958 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1959 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1961 DAG.getConstant(16, MVT::i16))),
1962 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1965 // This M00sE is N@stI! (apologies to Monty Python)
1967 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1968 // is to break it all apart, sign extend, and reassemble the various
1969 // intermediate products.
1971 SDValue rA = Op.getOperand(0);
1972 SDValue rB = Op.getOperand(1);
1973 SDValue c8 = DAG.getConstant(8, MVT::i32);
1974 SDValue c16 = DAG.getConstant(16, MVT::i32);
1977 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1978 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1979 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1981 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1983 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1986 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1987 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1989 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1990 DAG.getConstant(0x2222, MVT::i16));
1992 SDValue LoProdParts =
1993 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1994 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1995 LLProd, LHProd, FSMBmask));
1997 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
2000 DAG.getNode(ISD::AND, MVT::v4i32,
2002 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2003 LoProdMask, LoProdMask,
2004 LoProdMask, LoProdMask));
2007 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2008 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
2011 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2012 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
2015 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2016 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2017 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2020 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2021 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2022 DAG.getNode(SPUISD::VEC_SRA,
2023 MVT::v4i32, rAH, c8)),
2024 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2025 DAG.getNode(SPUISD::VEC_SRA,
2026 MVT::v4i32, rBH, c8)));
2029 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2031 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2035 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2037 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2038 DAG.getNode(ISD::OR, MVT::v4i32,
2046 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2047 MachineFunction &MF = DAG.getMachineFunction();
2048 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2050 SDValue A = Op.getOperand(0);
2051 SDValue B = Op.getOperand(1);
2052 MVT VT = Op.getValueType();
2054 unsigned VRegBR, VRegC;
2056 if (VT == MVT::f32) {
2057 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2058 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2060 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2061 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2063 // TODO: make sure we're feeding FPInterp the right arguments
2064 // Right now: fi B, frest(B)
2067 // (Floating Interpolate (FP Reciprocal Estimate B))
2069 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2070 DAG.getNode(SPUISD::FPInterp, VT, B,
2071 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2073 // Computes A * BRcpl and stores in a temporary register
2075 DAG.getCopyToReg(BRcpl, VRegC,
2076 DAG.getNode(ISD::FMUL, VT, A,
2077 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2078 // What's the Chain variable do? It's magic!
2079 // TODO: set Chain = Op(0).getEntryNode()
2081 return DAG.getNode(ISD::FADD, VT,
2082 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2083 DAG.getNode(ISD::FMUL, VT,
2084 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2085 DAG.getNode(ISD::FSUB, VT, A,
2086 DAG.getNode(ISD::FMUL, VT, B,
2087 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2090 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2091 MVT VT = Op.getValueType();
2092 SDValue N = Op.getOperand(0);
2093 SDValue Elt = Op.getOperand(1);
2096 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2097 // Constant argument:
2098 int EltNo = (int) C->getZExtValue();
2101 if (VT == MVT::i8 && EltNo >= 16)
2102 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2103 else if (VT == MVT::i16 && EltNo >= 8)
2104 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2105 else if (VT == MVT::i32 && EltNo >= 4)
2106 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2107 else if (VT == MVT::i64 && EltNo >= 2)
2108 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2110 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2111 // i32 and i64: Element 0 is the preferred slot
2112 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
2115 // Need to generate shuffle mask and extract:
2116 int prefslot_begin = -1, prefslot_end = -1;
2117 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2119 switch (VT.getSimpleVT()) {
2121 assert(false && "Invalid value type!");
2123 prefslot_begin = prefslot_end = 3;
2127 prefslot_begin = 2; prefslot_end = 3;
2132 prefslot_begin = 0; prefslot_end = 3;
2137 prefslot_begin = 0; prefslot_end = 7;
2142 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2143 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2145 unsigned int ShufBytes[16];
2146 for (int i = 0; i < 16; ++i) {
2147 // zero fill uppper part of preferred slot, don't care about the
2149 unsigned int mask_val;
2150 if (i <= prefslot_end) {
2152 ((i < prefslot_begin)
2154 : elt_byte + (i - prefslot_begin));
2156 ShufBytes[i] = mask_val;
2158 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
2161 SDValue ShufMask[4];
2162 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2163 unsigned bidx = i * 4;
2164 unsigned int bits = ((ShufBytes[bidx] << 24) |
2165 (ShufBytes[bidx+1] << 16) |
2166 (ShufBytes[bidx+2] << 8) |
2168 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2171 SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2173 sizeof(ShufMask) / sizeof(ShufMask[0]));
2175 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2176 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2177 N, N, ShufMaskVec));
2179 // Variable index: Rotate the requested element into slot 0, then replicate
2180 // slot 0 across the vector
2181 MVT VecVT = N.getValueType();
2182 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2183 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
2187 // Make life easier by making sure the index is zero-extended to i32
2188 if (Elt.getValueType() != MVT::i32)
2189 Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
2191 // Scale the index to a bit/byte shift quantity
2193 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2194 unsigned scaleShift = scaleFactor.logBase2();
2197 if (scaleShift > 0) {
2198 // Scale the shift factor:
2199 Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2200 DAG.getConstant(scaleShift, MVT::i32));
2203 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2205 // Replicate the bytes starting at byte 0 across the entire vector (for
2206 // consistency with the notion of a unified register set)
2209 switch (VT.getSimpleVT()) {
2211 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2215 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2216 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2221 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2222 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2228 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2229 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2235 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2236 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2237 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2238 loFactor, hiFactor);
2243 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2244 DAG.getNode(SPUISD::SHUFB, VecVT,
2245 vecShift, vecShift, replicate));
2251 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2252 SDValue VecOp = Op.getOperand(0);
2253 SDValue ValOp = Op.getOperand(1);
2254 SDValue IdxOp = Op.getOperand(2);
2255 MVT VT = Op.getValueType();
2257 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2258 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2260 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2261 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2262 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
2263 DAG.getRegister(SPU::R1, PtrVT),
2264 DAG.getConstant(CN->getSExtValue(), PtrVT));
2265 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
2268 DAG.getNode(SPUISD::SHUFB, VT,
2269 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2271 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
2276 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2278 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2280 assert(Op.getValueType() == MVT::i8);
2283 assert(0 && "Unhandled i8 math operator");
2287 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2289 SDValue N1 = Op.getOperand(1);
2290 N0 = (N0.getOpcode() != ISD::Constant
2291 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2292 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2294 N1 = (N1.getOpcode() != ISD::Constant
2295 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2296 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2298 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2299 DAG.getNode(Opc, MVT::i16, N0, N1));
2303 SDValue N1 = Op.getOperand(1);
2305 N0 = (N0.getOpcode() != ISD::Constant
2306 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2307 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2309 N1Opc = N1.getValueType().bitsLT(MVT::i32)
2312 N1 = (N1.getOpcode() != ISD::Constant
2313 ? DAG.getNode(N1Opc, MVT::i32, N1)
2314 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2317 DAG.getNode(ISD::OR, MVT::i16, N0,
2318 DAG.getNode(ISD::SHL, MVT::i16,
2319 N0, DAG.getConstant(8, MVT::i32)));
2320 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2321 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2325 SDValue N1 = Op.getOperand(1);
2327 N0 = (N0.getOpcode() != ISD::Constant
2328 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2329 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2331 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2334 N1 = (N1.getOpcode() != ISD::Constant
2335 ? DAG.getNode(N1Opc, MVT::i16, N1)
2336 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2338 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2339 DAG.getNode(Opc, MVT::i16, N0, N1));
2342 SDValue N1 = Op.getOperand(1);
2344 N0 = (N0.getOpcode() != ISD::Constant
2345 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2346 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2348 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2351 N1 = (N1.getOpcode() != ISD::Constant
2352 ? DAG.getNode(N1Opc, MVT::i16, N1)
2353 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2355 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2356 DAG.getNode(Opc, MVT::i16, N0, N1));
2359 SDValue N1 = Op.getOperand(1);
2361 N0 = (N0.getOpcode() != ISD::Constant
2362 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2363 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2365 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2366 N1 = (N1.getOpcode() != ISD::Constant
2367 ? DAG.getNode(N1Opc, MVT::i16, N1)
2368 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2370 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2371 DAG.getNode(Opc, MVT::i16, N0, N1));
2379 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2381 MVT VT = Op.getValueType();
2382 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2384 SDValue Op0 = Op.getOperand(0);
2387 case ISD::ZERO_EXTEND:
2388 case ISD::SIGN_EXTEND:
2389 case ISD::ANY_EXTEND: {
2390 MVT Op0VT = Op0.getValueType();
2391 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2393 assert(Op0VT == MVT::i32
2394 && "CellSPU: Zero/sign extending something other than i32");
2396 DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n");
2398 SDValue PromoteScalar =
2399 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2401 if (Opc != ISD::SIGN_EXTEND) {
2402 // Use a shuffle to zero extend the i32 to i64 directly:
2404 DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
2405 DAG.getConstant(0x80808080, MVT::i32),
2406 DAG.getConstant(0x00010203, MVT::i32),
2407 DAG.getConstant(0x80808080, MVT::i32),
2408 DAG.getConstant(0x08090a0b, MVT::i32));
2409 SDValue zextShuffle =
2410 DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2411 PromoteScalar, PromoteScalar, shufMask);
2413 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2414 DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle));
2416 // SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift
2417 // right and propagate the sign bit) instruction.
2419 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT,
2420 PromoteScalar, DAG.getConstant(4, MVT::i32));
2422 DAG.getNode(SPUISD::VEC_SRA, Op0VecVT,
2423 PromoteScalar, DAG.getConstant(32, MVT::i32));
2425 DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT,
2426 DAG.getConstant(0xf0f0, MVT::i16));
2427 SDValue CombineQuad =
2428 DAG.getNode(SPUISD::SELB, Op0VecVT,
2429 SignQuad, RotQuad, SelMask);
2431 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2432 DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad));
2437 // Turn operands into vectors to satisfy type checking (shufb works on
2440 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2442 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2443 SmallVector<SDValue, 16> ShufBytes;
2445 // Create the shuffle mask for "rotating" the borrow up one register slot
2446 // once the borrow is generated.
2447 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2448 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2449 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2450 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2453 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2454 SDValue ShiftedCarry =
2455 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2457 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2458 &ShufBytes[0], ShufBytes.size()));
2460 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2461 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2462 Op0, Op1, ShiftedCarry));
2466 // Turn operands into vectors to satisfy type checking (shufb works on
2469 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2471 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2472 SmallVector<SDValue, 16> ShufBytes;
2474 // Create the shuffle mask for "rotating" the borrow up one register slot
2475 // once the borrow is generated.
2476 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2477 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2478 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2479 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2482 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2483 SDValue ShiftedBorrow =
2484 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2485 BorrowGen, BorrowGen,
2486 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2487 &ShufBytes[0], ShufBytes.size()));
2489 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2490 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2491 Op0, Op1, ShiftedBorrow));
2495 SDValue ShiftAmt = Op.getOperand(1);
2496 MVT ShiftAmtVT = ShiftAmt.getValueType();
2497 SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2499 DAG.getNode(SPUISD::SELB, VecVT,
2501 DAG.getConstant(0, VecVT),
2502 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2503 DAG.getConstant(0xff00ULL, MVT::i16)));
2504 SDValue ShiftAmtBytes =
2505 DAG.getNode(ISD::SRL, ShiftAmtVT,
2507 DAG.getConstant(3, ShiftAmtVT));
2508 SDValue ShiftAmtBits =
2509 DAG.getNode(ISD::AND, ShiftAmtVT,
2511 DAG.getConstant(7, ShiftAmtVT));
2513 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2514 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2515 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2516 MaskLower, ShiftAmtBytes),
2521 MVT VT = Op.getValueType();
2522 SDValue ShiftAmt = Op.getOperand(1);
2523 MVT ShiftAmtVT = ShiftAmt.getValueType();
2524 SDValue ShiftAmtBytes =
2525 DAG.getNode(ISD::SRL, ShiftAmtVT,
2527 DAG.getConstant(3, ShiftAmtVT));
2528 SDValue ShiftAmtBits =
2529 DAG.getNode(ISD::AND, ShiftAmtVT,
2531 DAG.getConstant(7, ShiftAmtVT));
2533 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2534 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2535 Op0, ShiftAmtBytes),
2540 // Promote Op0 to vector
2542 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2543 SDValue ShiftAmt = Op.getOperand(1);
2544 MVT ShiftVT = ShiftAmt.getValueType();
2546 // Negate variable shift amounts
2547 if (!isa<ConstantSDNode>(ShiftAmt)) {
2548 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2549 DAG.getConstant(0, ShiftVT), ShiftAmt);
2552 SDValue UpperHalfSign =
2553 DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32,
2554 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2555 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2556 Op0, DAG.getConstant(31, MVT::i32))));
2557 SDValue UpperHalfSignMask =
2558 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2559 SDValue UpperLowerMask =
2560 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2561 DAG.getConstant(0xff00, MVT::i16));
2562 SDValue UpperLowerSelect =
2563 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2564 UpperHalfSignMask, Op0, UpperLowerMask);
2565 SDValue RotateLeftBytes =
2566 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2567 UpperLowerSelect, ShiftAmt);
2568 SDValue RotateLeftBits =
2569 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2570 RotateLeftBytes, ShiftAmt);
2572 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2580 //! Lower byte immediate operations for v16i8 vectors:
2582 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2585 MVT VT = Op.getValueType();
2587 ConstVec = Op.getOperand(0);
2588 Arg = Op.getOperand(1);
2589 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2590 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2591 ConstVec = ConstVec.getOperand(0);
2593 ConstVec = Op.getOperand(1);
2594 Arg = Op.getOperand(0);
2595 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2596 ConstVec = ConstVec.getOperand(0);
2601 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2602 uint64_t VectorBits[2];
2603 uint64_t UndefBits[2];
2604 uint64_t SplatBits, SplatUndef;
2607 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2608 && isConstantSplat(VectorBits, UndefBits,
2609 VT.getVectorElementType().getSizeInBits(),
2610 SplatBits, SplatUndef, SplatSize)) {
2612 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2613 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2615 // Turn the BUILD_VECTOR into a set of target constants:
2616 for (size_t i = 0; i < tcVecSize; ++i)
2619 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2620 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2623 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2624 // lowered. Return the operation, rather than a null SDValue.
2628 //! Lower i32 multiplication
2629 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2631 switch (VT.getSimpleVT()) {
2633 cerr << "CellSPU: Unknown LowerMUL value type, got "
2634 << Op.getValueType().getMVTString()
2640 SDValue rA = Op.getOperand(0);
2641 SDValue rB = Op.getOperand(1);
2643 return DAG.getNode(ISD::ADD, MVT::i32,
2644 DAG.getNode(ISD::ADD, MVT::i32,
2645 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2646 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2647 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2654 //! Custom lowering for CTPOP (count population)
2656 Custom lowering code that counts the number ones in the input
2657 operand. SPU has such an instruction, but it counts the number of
2658 ones per byte, which then have to be accumulated.
2660 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2661 MVT VT = Op.getValueType();
2662 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2664 switch (VT.getSimpleVT()) {
2666 assert(false && "Invalid value type!");
2668 SDValue N = Op.getOperand(0);
2669 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2671 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2672 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2674 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2678 MachineFunction &MF = DAG.getMachineFunction();
2679 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2681 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2683 SDValue N = Op.getOperand(0);
2684 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2685 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2686 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2688 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2689 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2691 // CNTB_result becomes the chain to which all of the virtual registers
2692 // CNTB_reg, SUM1_reg become associated:
2693 SDValue CNTB_result =
2694 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2696 SDValue CNTB_rescopy =
2697 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2699 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2701 return DAG.getNode(ISD::AND, MVT::i16,
2702 DAG.getNode(ISD::ADD, MVT::i16,
2703 DAG.getNode(ISD::SRL, MVT::i16,
2710 MachineFunction &MF = DAG.getMachineFunction();
2711 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2713 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2714 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2716 SDValue N = Op.getOperand(0);
2717 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2718 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2719 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2720 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2722 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2723 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2725 // CNTB_result becomes the chain to which all of the virtual registers
2726 // CNTB_reg, SUM1_reg become associated:
2727 SDValue CNTB_result =
2728 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2730 SDValue CNTB_rescopy =
2731 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2734 DAG.getNode(ISD::SRL, MVT::i32,
2735 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2738 DAG.getNode(ISD::ADD, MVT::i32,
2739 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2741 SDValue Sum1_rescopy =
2742 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2745 DAG.getNode(ISD::SRL, MVT::i32,
2746 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2749 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2750 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2752 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2762 //! Lower ISD::SELECT_CC
2764 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2767 \note Need to revisit this in the future: if the code path through the true
2768 and false value computations is longer than the latency of a branch (6
2769 cycles), then it would be more advantageous to branch and insert a new basic
2770 block and branch on the condition. However, this code does not make that
2771 assumption, given the simplisitc uses so far.
2774 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
2775 MVT VT = Op.getValueType();
2776 SDValue lhs = Op.getOperand(0);
2777 SDValue rhs = Op.getOperand(1);
2778 SDValue trueval = Op.getOperand(2);
2779 SDValue falseval = Op.getOperand(3);
2780 SDValue condition = Op.getOperand(4);
2782 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2783 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2784 // with another "cannot select select_cc" assert:
2786 SDValue compare = DAG.getNode(ISD::SETCC, VT, lhs, rhs, condition);
2787 return DAG.getNode(SPUISD::SELB, VT, trueval, falseval, compare);
2790 //! Custom lower ISD::TRUNCATE
2791 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2793 MVT VT = Op.getValueType();
2794 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2795 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2797 SDValue Op0 = Op.getOperand(0);
2798 MVT Op0VT = Op0.getValueType();
2799 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2801 SDValue PromoteScalar = DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2806 // Create shuffle mask
2807 switch (Op0VT.getSimpleVT()) {
2811 // least significant doubleword of quadword
2812 maskHigh = 0x08090a0b;
2813 maskLow = 0x0c0d0e0f;
2816 // least significant word of quadword
2817 maskHigh = maskLow = 0x0c0d0e0f;
2820 // least significant halfword of quadword
2821 maskHigh = maskLow = 0x0e0f0e0f;
2824 // least significant byte of quadword
2825 maskHigh = maskLow = 0x0f0f0f0f;
2828 cerr << "Truncation to illegal type!";
2835 // least significant word of doubleword
2836 maskHigh = maskLow = 0x04050607;
2839 // least significant halfword of doubleword
2840 maskHigh = maskLow = 0x06070607;
2843 // least significant byte of doubleword
2844 maskHigh = maskLow = 0x07070707;
2847 cerr << "Truncation to illegal type!";
2855 // least significant halfword of word
2856 maskHigh = maskLow = 0x02030203;
2859 // least significant byte of word/halfword
2860 maskHigh = maskLow = 0x03030303;
2863 cerr << "Truncation to illegal type!";
2868 cerr << "Trying to lower truncation from illegal type!";
2872 // Use a shuffle to perform the truncation
2873 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2874 DAG.getConstant(maskHigh, MVT::i32),
2875 DAG.getConstant(maskLow, MVT::i32),
2876 DAG.getConstant(maskHigh, MVT::i32),
2877 DAG.getConstant(maskLow, MVT::i32));
2879 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2880 PromoteScalar, PromoteScalar, shufMask);
2882 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2883 DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
2886 //! Custom (target-specific) lowering entry point
2888 This is where LLVM's DAG selection process calls to do target-specific
2892 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2894 unsigned Opc = (unsigned) Op.getOpcode();
2895 MVT VT = Op.getValueType();
2899 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2900 cerr << "Op.getOpcode() = " << Opc << "\n";
2901 cerr << "*Op.getNode():\n";
2902 Op.getNode()->dump();
2909 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2911 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2912 case ISD::ConstantPool:
2913 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2914 case ISD::GlobalAddress:
2915 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2916 case ISD::JumpTable:
2917 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2919 return LowerConstant(Op, DAG);
2920 case ISD::ConstantFP:
2921 return LowerConstantFP(Op, DAG);
2923 return LowerBRCOND(Op, DAG);
2924 case ISD::FORMAL_ARGUMENTS:
2925 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2927 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2929 return LowerRET(Op, DAG, getTargetMachine());
2932 // i8, i64 math ops:
2933 case ISD::ZERO_EXTEND:
2934 case ISD::SIGN_EXTEND:
2935 case ISD::ANY_EXTEND:
2944 return LowerI8Math(Op, DAG, Opc);
2945 else if (VT == MVT::i64)
2946 return LowerI64Math(Op, DAG, Opc);
2950 // Vector-related lowering.
2951 case ISD::BUILD_VECTOR:
2952 return LowerBUILD_VECTOR(Op, DAG);
2953 case ISD::SCALAR_TO_VECTOR:
2954 return LowerSCALAR_TO_VECTOR(Op, DAG);
2955 case ISD::VECTOR_SHUFFLE:
2956 return LowerVECTOR_SHUFFLE(Op, DAG);
2957 case ISD::EXTRACT_VECTOR_ELT:
2958 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2959 case ISD::INSERT_VECTOR_ELT:
2960 return LowerINSERT_VECTOR_ELT(Op, DAG);
2962 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2966 return LowerByteImmed(Op, DAG);
2968 // Vector and i8 multiply:
2971 return LowerVectorMUL(Op, DAG);
2972 else if (VT == MVT::i8)
2973 return LowerI8Math(Op, DAG, Opc);
2975 return LowerMUL(Op, DAG, VT, Opc);
2978 if (VT == MVT::f32 || VT == MVT::v4f32)
2979 return LowerFDIVf32(Op, DAG);
2981 // This is probably a libcall
2982 else if (Op.getValueType() == MVT::f64)
2983 return LowerFDIVf64(Op, DAG);
2986 assert(0 && "Calling FDIV on unsupported MVT");
2989 return LowerCTPOP(Op, DAG);
2991 case ISD::SELECT_CC:
2992 return LowerSELECT_CC(Op, DAG);
2995 return LowerTRUNCATE(Op, DAG);
3001 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
3002 SmallVectorImpl<SDValue>&Results,
3006 unsigned Opc = (unsigned) N->getOpcode();
3007 MVT OpVT = N->getValueType(0);
3011 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
3012 cerr << "Op.getOpcode() = " << Opc << "\n";
3013 cerr << "*Op.getNode():\n";
3021 /* Otherwise, return unchanged */
3024 //===----------------------------------------------------------------------===//
3025 // Target Optimization Hooks
3026 //===----------------------------------------------------------------------===//
3029 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
3032 TargetMachine &TM = getTargetMachine();
3034 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
3035 SelectionDAG &DAG = DCI.DAG;
3036 SDValue Op0 = N->getOperand(0); // everything has at least one operand
3037 MVT NodeVT = N->getValueType(0); // The node's value type
3038 MVT Op0VT = Op0.getValueType(); // The first operand's result
3039 SDValue Result; // Initially, empty result
3041 switch (N->getOpcode()) {
3044 SDValue Op1 = N->getOperand(1);
3046 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
3047 SDValue Op01 = Op0.getOperand(1);
3048 if (Op01.getOpcode() == ISD::Constant
3049 || Op01.getOpcode() == ISD::TargetConstant) {
3050 // (add <const>, (SPUindirect <arg>, <const>)) ->
3051 // (SPUindirect <arg>, <const + const>)
3052 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
3053 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
3054 SDValue combinedConst =
3055 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
3057 #if !defined(NDEBUG)
3058 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
3060 << "Replace: (add " << CN0->getZExtValue() << ", "
3061 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n"
3062 << "With: (SPUindirect <arg>, "
3063 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n";
3067 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
3068 Op0.getOperand(0), combinedConst);
3070 } else if (isa<ConstantSDNode>(Op0)
3071 && Op1.getOpcode() == SPUISD::IndirectAddr) {
3072 SDValue Op11 = Op1.getOperand(1);
3073 if (Op11.getOpcode() == ISD::Constant
3074 || Op11.getOpcode() == ISD::TargetConstant) {
3075 // (add (SPUindirect <arg>, <const>), <const>) ->
3076 // (SPUindirect <arg>, <const + const>)
3077 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
3078 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
3079 SDValue combinedConst =
3080 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
3082 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
3083 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
3084 DEBUG(cerr << "With: (SPUindirect <arg>, "
3085 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
3087 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
3088 Op1.getOperand(0), combinedConst);
3093 case ISD::SIGN_EXTEND:
3094 case ISD::ZERO_EXTEND:
3095 case ISD::ANY_EXTEND: {
3096 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
3097 // (any_extend (SPUextract_elt0 <arg>)) ->
3098 // (SPUextract_elt0 <arg>)
3099 // Types must match, however...
3100 #if !defined(NDEBUG)
3101 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
3102 cerr << "\nReplace: ";
3105 Op0.getNode()->dump(&DAG);
3114 case SPUISD::IndirectAddr: {
3115 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
3116 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
3117 if (CN->getZExtValue() == 0) {
3118 // (SPUindirect (SPUaform <addr>, 0), 0) ->
3119 // (SPUaform <addr>, 0)
3121 DEBUG(cerr << "Replace: ");
3122 DEBUG(N->dump(&DAG));
3123 DEBUG(cerr << "\nWith: ");
3124 DEBUG(Op0.getNode()->dump(&DAG));
3125 DEBUG(cerr << "\n");
3132 case SPUISD::SHLQUAD_L_BITS:
3133 case SPUISD::SHLQUAD_L_BYTES:
3134 case SPUISD::VEC_SHL:
3135 case SPUISD::VEC_SRL:
3136 case SPUISD::VEC_SRA:
3137 case SPUISD::ROTQUAD_RZ_BYTES:
3138 case SPUISD::ROTQUAD_RZ_BITS: {
3139 SDValue Op1 = N->getOperand(1);
3141 if (isa<ConstantSDNode>(Op1)) {
3142 // Kill degenerate vector shifts:
3143 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
3144 if (CN->getZExtValue() == 0) {
3150 case SPUISD::PROMOTE_SCALAR: {
3151 switch (Op0.getOpcode()) {
3154 case ISD::ANY_EXTEND:
3155 case ISD::ZERO_EXTEND:
3156 case ISD::SIGN_EXTEND: {
3157 // (SPUpromote_scalar (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
3159 // but only if the SPUpromote_scalar and <arg> types match.
3160 SDValue Op00 = Op0.getOperand(0);
3161 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
3162 SDValue Op000 = Op00.getOperand(0);
3163 if (Op000.getValueType() == NodeVT) {
3169 case SPUISD::VEC2PREFSLOT: {
3170 // (SPUpromote_scalar (SPUvec2prefslot <arg>)) ->
3172 Result = Op0.getOperand(0);
3179 // Otherwise, return unchanged.
3181 if (Result.getNode()) {
3182 DEBUG(cerr << "\nReplace.SPU: ");
3183 DEBUG(N->dump(&DAG));
3184 DEBUG(cerr << "\nWith: ");
3185 DEBUG(Result.getNode()->dump(&DAG));
3186 DEBUG(cerr << "\n");
3193 //===----------------------------------------------------------------------===//
3194 // Inline Assembly Support
3195 //===----------------------------------------------------------------------===//
3197 /// getConstraintType - Given a constraint letter, return the type of
3198 /// constraint it is for this target.
3199 SPUTargetLowering::ConstraintType
3200 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3201 if (ConstraintLetter.size() == 1) {
3202 switch (ConstraintLetter[0]) {
3209 return C_RegisterClass;
3212 return TargetLowering::getConstraintType(ConstraintLetter);
3215 std::pair<unsigned, const TargetRegisterClass*>
3216 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3219 if (Constraint.size() == 1) {
3220 // GCC RS6000 Constraint Letters
3221 switch (Constraint[0]) {
3225 return std::make_pair(0U, SPU::R64CRegisterClass);
3226 return std::make_pair(0U, SPU::R32CRegisterClass);
3229 return std::make_pair(0U, SPU::R32FPRegisterClass);
3230 else if (VT == MVT::f64)
3231 return std::make_pair(0U, SPU::R64FPRegisterClass);
3234 return std::make_pair(0U, SPU::GPRCRegisterClass);
3238 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3241 //! Compute used/known bits for a SPU operand
3243 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3247 const SelectionDAG &DAG,
3248 unsigned Depth ) const {
3250 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
3253 switch (Op.getOpcode()) {
3255 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3265 case SPUISD::PROMOTE_SCALAR: {
3266 SDValue Op0 = Op.getOperand(0);
3267 MVT Op0VT = Op0.getValueType();
3268 unsigned Op0VTBits = Op0VT.getSizeInBits();
3269 uint64_t InMask = Op0VT.getIntegerVTBitMask();
3270 KnownZero |= APInt(Op0VTBits, ~InMask, false);
3271 KnownOne |= APInt(Op0VTBits, InMask, false);
3275 case SPUISD::LDRESULT:
3276 case SPUISD::VEC2PREFSLOT: {
3277 MVT OpVT = Op.getValueType();
3278 unsigned OpVTBits = OpVT.getSizeInBits();
3279 uint64_t InMask = OpVT.getIntegerVTBitMask();
3280 KnownZero |= APInt(OpVTBits, ~InMask, false);
3281 KnownOne |= APInt(OpVTBits, InMask, false);
3290 case SPUISD::SHLQUAD_L_BITS:
3291 case SPUISD::SHLQUAD_L_BYTES:
3292 case SPUISD::VEC_SHL:
3293 case SPUISD::VEC_SRL:
3294 case SPUISD::VEC_SRA:
3295 case SPUISD::VEC_ROTL:
3296 case SPUISD::VEC_ROTR:
3297 case SPUISD::ROTQUAD_RZ_BYTES:
3298 case SPUISD::ROTQUAD_RZ_BITS:
3299 case SPUISD::ROTBYTES_LEFT:
3300 case SPUISD::SELECT_MASK:
3302 case SPUISD::FPInterp:
3303 case SPUISD::FPRecipEst:
3304 case SPUISD::SEXT32TO64:
3309 // LowerAsmOperandForConstraint
3311 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3312 char ConstraintLetter,
3314 std::vector<SDValue> &Ops,
3315 SelectionDAG &DAG) const {
3316 // Default, for the time being, to the base class handler
3317 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3321 /// isLegalAddressImmediate - Return true if the integer value can be used
3322 /// as the offset of the target addressing mode.
3323 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3324 const Type *Ty) const {
3325 // SPU's addresses are 256K:
3326 return (V > -(1 << 18) && V < (1 << 18) - 1);
3329 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3334 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3335 // The SPU target isn't yet aware of offsets.