1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by a team from the Computer Systems Research
6 // Department at The Aerospace Corporation and is distributed under the
7 // University of Illinois Open Source License. See LICENSE.TXT for details.
9 //===----------------------------------------------------------------------===//
11 // This file implements the SPUTargetLowering class.
13 //===----------------------------------------------------------------------===//
15 #include "SPURegisterNames.h"
16 #include "SPUISelLowering.h"
17 #include "SPUTargetMachine.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/SSARegMap.h"
26 #include "llvm/Constants.h"
27 #include "llvm/Function.h"
28 #include "llvm/Intrinsics.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Target/TargetOptions.h"
37 // Used in getTargetNodeName() below
39 std::map<unsigned, const char *> node_names;
41 //! MVT::ValueType mapping to useful data for Cell SPU
42 struct valtype_map_s {
43 const MVT::ValueType valtype;
44 const int prefslot_byte;
47 const valtype_map_s valtype_map[] = {
58 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
60 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
61 const valtype_map_s *retval = 0;
63 for (size_t i = 0; i < n_valtype_map; ++i) {
64 if (valtype_map[i].valtype == VT) {
65 retval = valtype_map + i;
72 cerr << "getValueTypeMapEntry returns NULL for "
73 << MVT::getValueTypeString(VT)
82 //! Predicate that returns true if operand is a memory target
84 \arg Op Operand to test
85 \return true if the operand is a memory target (i.e., global
86 address, external symbol, constant pool) or an existing D-Form
89 bool isMemoryOperand(const SDOperand &Op)
91 const unsigned Opc = Op.getOpcode();
92 return (Opc == ISD::GlobalAddress
93 || Opc == ISD::GlobalTLSAddress
94 || Opc == ISD::FrameIndex
95 || Opc == ISD::JumpTable
96 || Opc == ISD::ConstantPool
97 || Opc == ISD::ExternalSymbol
98 || Opc == ISD::TargetGlobalAddress
99 || Opc == ISD::TargetGlobalTLSAddress
100 || Opc == ISD::TargetFrameIndex
101 || Opc == ISD::TargetJumpTable
102 || Opc == ISD::TargetConstantPool
103 || Opc == ISD::TargetExternalSymbol
104 || Opc == SPUISD::DFormAddr);
108 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
109 : TargetLowering(TM),
112 // Fold away setcc operations if possible.
115 // Use _setjmp/_longjmp instead of setjmp/longjmp.
116 setUseUnderscoreSetJmp(true);
117 setUseUnderscoreLongJmp(true);
119 // Set up the SPU's register classes:
120 // NOTE: i8 register class is not registered because we cannot determine when
121 // we need to zero or sign extend for custom-lowered loads and stores.
122 // NOTE: Ignore the previous note. For now. :-)
123 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
124 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
125 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
126 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
127 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
128 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
129 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
131 // SPU has no sign or zero extended loads for i1, i8, i16:
132 setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
133 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
134 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
135 setStoreXAction(MVT::i1, Custom);
137 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
138 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
139 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
140 setStoreXAction(MVT::i8, Custom);
142 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
143 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
144 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
146 // SPU constant load actions are custom lowered:
147 setOperationAction(ISD::Constant, MVT::i64, Custom);
148 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
149 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
151 // SPU's loads and stores have to be custom lowered:
152 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
154 setOperationAction(ISD::LOAD, sctype, Custom);
155 setOperationAction(ISD::STORE, sctype, Custom);
158 // SPU supports BRCOND, although DAGCombine will convert BRCONDs
159 // into BR_CCs. BR_CC instructions are custom selected in
161 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
163 // Expand the jumptable branches
164 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
165 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
166 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
168 // SPU has no intrinsics for these particular operations:
169 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
170 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
171 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
173 // PowerPC has no SREM/UREM instructions
174 setOperationAction(ISD::SREM, MVT::i32, Expand);
175 setOperationAction(ISD::UREM, MVT::i32, Expand);
176 setOperationAction(ISD::SREM, MVT::i64, Expand);
177 setOperationAction(ISD::UREM, MVT::i64, Expand);
179 // We don't support sin/cos/sqrt/fmod
180 setOperationAction(ISD::FSIN , MVT::f64, Expand);
181 setOperationAction(ISD::FCOS , MVT::f64, Expand);
182 setOperationAction(ISD::FREM , MVT::f64, Expand);
183 setOperationAction(ISD::FSIN , MVT::f32, Expand);
184 setOperationAction(ISD::FCOS , MVT::f32, Expand);
185 setOperationAction(ISD::FREM , MVT::f32, Expand);
187 // If we're enabling GP optimizations, use hardware square root
188 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
189 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
191 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
192 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
194 // SPU can do rotate right and left, so legalize it... but customize for i8
195 // because instructions don't exist.
196 setOperationAction(ISD::ROTR, MVT::i32, Legal);
197 setOperationAction(ISD::ROTR, MVT::i16, Legal);
198 setOperationAction(ISD::ROTR, MVT::i8, Custom);
199 setOperationAction(ISD::ROTL, MVT::i32, Legal);
200 setOperationAction(ISD::ROTL, MVT::i16, Legal);
201 setOperationAction(ISD::ROTL, MVT::i8, Custom);
202 // SPU has no native version of shift left/right for i8
203 setOperationAction(ISD::SHL, MVT::i8, Custom);
204 setOperationAction(ISD::SRL, MVT::i8, Custom);
205 setOperationAction(ISD::SRA, MVT::i8, Custom);
207 // Custom lower i32 multiplications
208 setOperationAction(ISD::MUL, MVT::i32, Custom);
210 // Need to custom handle (some) common i8 math ops
211 setOperationAction(ISD::SUB, MVT::i8, Custom);
212 setOperationAction(ISD::MUL, MVT::i8, Custom);
214 // SPU does not have BSWAP. It does have i32 support CTLZ.
215 // CTPOP has to be custom lowered.
216 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
217 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
219 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
220 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
221 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
222 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
224 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
225 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
227 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
229 // SPU does not have select or setcc
230 setOperationAction(ISD::SELECT, MVT::i1, Expand);
231 setOperationAction(ISD::SELECT, MVT::i8, Expand);
232 setOperationAction(ISD::SELECT, MVT::i16, Expand);
233 setOperationAction(ISD::SELECT, MVT::i32, Expand);
234 setOperationAction(ISD::SELECT, MVT::i64, Expand);
235 setOperationAction(ISD::SELECT, MVT::f32, Expand);
236 setOperationAction(ISD::SELECT, MVT::f64, Expand);
238 setOperationAction(ISD::SETCC, MVT::i1, Expand);
239 setOperationAction(ISD::SETCC, MVT::i8, Expand);
240 setOperationAction(ISD::SETCC, MVT::i16, Expand);
241 setOperationAction(ISD::SETCC, MVT::i32, Expand);
242 setOperationAction(ISD::SETCC, MVT::i64, Expand);
243 setOperationAction(ISD::SETCC, MVT::f32, Expand);
244 setOperationAction(ISD::SETCC, MVT::f64, Expand);
246 // SPU has a legal FP -> signed INT instruction
247 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
248 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
249 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
250 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
252 // FDIV on SPU requires custom lowering
253 setOperationAction(ISD::FDIV, MVT::f32, Custom);
254 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
256 // SPU has [U|S]INT_TO_FP
257 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
258 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
259 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
260 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
261 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
262 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
263 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
264 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
266 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
267 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
268 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
269 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
271 // We cannot sextinreg(i1). Expand to shifts.
272 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
274 // Support label based line numbers.
275 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
276 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
278 // We want to legalize GlobalAddress and ConstantPool nodes into the
279 // appropriate instructions to materialize the address.
280 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
281 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
282 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
283 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
284 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
285 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
286 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
287 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
289 // RET must be custom lowered, to meet ABI requirements
290 setOperationAction(ISD::RET, MVT::Other, Custom);
292 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
293 setOperationAction(ISD::VASTART , MVT::Other, Custom);
295 // Use the default implementation.
296 setOperationAction(ISD::VAARG , MVT::Other, Expand);
297 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
298 setOperationAction(ISD::VAEND , MVT::Other, Expand);
299 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
300 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
301 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
302 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
304 // Cell SPU has instructions for converting between i64 and fp.
305 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
306 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
308 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
309 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
311 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
312 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
314 // First set operation action for all vector types to expand. Then we
315 // will selectively turn on ones that can be effectively codegen'd.
316 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
317 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
318 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
319 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
320 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
321 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
323 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
324 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
325 // add/sub are legal for all supported vector VT's.
326 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
327 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
328 // mul has to be custom lowered.
329 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
331 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
332 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
333 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
334 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
335 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
336 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
338 // These operations need to be expanded:
339 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
340 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
341 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
342 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
343 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
345 // Custom lower build_vector, constant pool spills, insert and
346 // extract vector elements:
347 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
348 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
349 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
350 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
351 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
352 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
355 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
356 setOperationAction(ISD::AND, MVT::v16i8, Custom);
357 setOperationAction(ISD::OR, MVT::v16i8, Custom);
358 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
359 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
361 setSetCCResultType(MVT::i32);
362 setShiftAmountType(MVT::i32);
363 setSetCCResultContents(ZeroOrOneSetCCResult);
365 setStackPointerRegisterToSaveRestore(SPU::R1);
367 // We have target-specific dag combine patterns for the following nodes:
368 // e.g., setTargetDAGCombine(ISD::SUB);
370 computeRegisterProperties();
374 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
376 if (node_names.empty()) {
377 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
378 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
379 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
380 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
381 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
382 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
383 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
384 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
385 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
386 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
387 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
388 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
389 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
390 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
391 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
392 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
393 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
394 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
395 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
396 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
397 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
398 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
399 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
400 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
401 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
402 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
403 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
404 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
405 "SPUISD::ROTBYTES_RIGHT_Z";
406 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
407 "SPUISD::ROTBYTES_RIGHT_S";
408 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
409 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
410 "SPUISD::ROTBYTES_LEFT_CHAINED";
411 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
412 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
413 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
414 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
415 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
416 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
419 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
421 return ((i != node_names.end()) ? i->second : 0);
424 //===----------------------------------------------------------------------===//
425 // Calling convention code:
426 //===----------------------------------------------------------------------===//
428 #include "SPUGenCallingConv.inc"
430 //===----------------------------------------------------------------------===//
431 // LowerOperation implementation
432 //===----------------------------------------------------------------------===//
434 /// Custom lower loads for CellSPU
436 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
437 within a 16-byte block, we have to rotate to extract the requested element.
440 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
441 LoadSDNode *LN = cast<LoadSDNode>(Op);
442 SDOperand basep = LN->getBasePtr();
443 SDOperand the_chain = LN->getChain();
444 MVT::ValueType VT = LN->getLoadedVT();
445 MVT::ValueType OpVT = Op.Val->getValueType(0);
446 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
447 ISD::LoadExtType ExtType = LN->getExtensionType();
448 unsigned alignment = LN->getAlignment();
449 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
452 // For an extending load of an i1 variable, just call it i8 (or whatever we
453 // were passed) and make it zero-extended:
456 ExtType = ISD::ZEXTLOAD;
459 switch (LN->getAddressingMode()) {
460 case ISD::UNINDEXED: {
462 SDOperand rot_op, rotamt;
467 // The vector type we really want to be when we load the 16-byte chunk
468 MVT::ValueType vecVT, opVecVT;
471 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
475 opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
477 if (basep.getOpcode() == ISD::ADD) {
478 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
481 && "LowerLOAD: ISD::ADD operand 1 is not constant");
483 c_offset = (int) CN->getValue();
484 c_rotamt = (int) (c_offset & 0xf);
486 // Adjust the rotation amount to ensure that the final result ends up in
487 // the preferred slot:
488 c_rotamt -= vtm->prefslot_byte;
489 ptrp = basep.getOperand(0);
492 c_rotamt = -vtm->prefslot_byte;
496 if (alignment == 16) {
497 // 16-byte aligned load into preferred slot, no rotation
499 if (isMemoryOperand(ptrp))
503 // Return modified D-Form address for pointer:
504 ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
505 ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
507 return DAG.getLoad(VT, LN->getChain(), ptrp,
508 LN->getSrcValue(), LN->getSrcValueOffset(),
509 LN->isVolatile(), 16);
511 return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
512 LN->getSrcValueOffset(), OpVT,
513 LN->isVolatile(), 16);
519 // Realign the base pointer, with a D-Form address
520 if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp))
521 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
522 ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
527 rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
528 LN->getSrcValue(), LN->getSrcValueOffset(),
529 LN->isVolatile(), 16);
530 the_chain = rot_op.getValue(1);
531 rotamt = DAG.getConstant(c_rotamt, MVT::i16);
533 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
538 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
539 the_chain = result.getValue(1);
541 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
546 scalarvts = DAG.getVTList(VT, MVT::Other);
548 scalarvts = DAG.getVTList(OpVT, MVT::Other);
551 result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
555 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
556 the_chain = result.getValue(1);
558 // Handle the sign and zero-extending loads for i1 and i8:
561 if (ExtType == ISD::SEXTLOAD) {
562 NewOpC = (OpVT == MVT::i1
563 ? SPUISD::EXTRACT_I1_SEXT
564 : SPUISD::EXTRACT_I8_SEXT);
565 } else if (ExtType == ISD::ZEXTLOAD) {
566 NewOpC = (OpVT == MVT::i1
567 ? SPUISD::EXTRACT_I1_ZEXT
568 : SPUISD::EXTRACT_I8_ZEXT);
571 result = DAG.getNode(NewOpC, OpVT, result);
574 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
575 SDOperand retops[2] = { result, the_chain };
577 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
582 // Misaligned 16-byte load:
583 if (basep.getOpcode() == ISD::LOAD) {
584 LN = cast<LoadSDNode>(basep);
585 if (LN->getAlignment() == 16) {
586 // We can verify that we're really loading from a 16-byte aligned
587 // chunk. Encapsulate basep as a D-Form address and return a new
589 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
590 DAG.getConstant(0, PtrVT));
592 return DAG.getLoad(VT, LN->getChain(), basep,
593 LN->getSrcValue(), LN->getSrcValueOffset(),
594 LN->isVolatile(), 16);
596 return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
597 LN->getSrcValue(), LN->getSrcValueOffset(),
598 OpVT, LN->isVolatile(), 16);
602 // Catch all other cases where we can't guarantee that we have a
603 // 16-byte aligned entity, which means resorting to an X-form
606 SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
607 SDOperand loOp = DAG.getNode(SPUISD::Lo, VT, basep, ZeroOffs);
608 SDOperand hiOp = DAG.getNode(SPUISD::Hi, VT, basep, ZeroOffs);
610 ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
612 SDOperand alignLoad =
613 DAG.getLoad(opVecVT, LN->getChain(), ptrp,
614 LN->getSrcValue(), LN->getSrcValueOffset(),
615 LN->isVolatile(), 16);
617 SDOperand insertEltOp =
618 DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
620 result = DAG.getNode(SPUISD::SHUFB, opVecVT,
623 DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
625 result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
627 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
628 SDOperand retops[2] = { result, the_chain };
630 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
639 case ISD::LAST_INDEXED_MODE:
640 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
642 cerr << (unsigned) LN->getAddressingMode() << "\n";
650 /// Custom lower stores for CellSPU
652 All CellSPU stores are aligned to 16-byte boundaries, so for elements
653 within a 16-byte block, we have to generate a shuffle to insert the
654 requested element into its place, then store the resulting block.
657 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
658 StoreSDNode *SN = cast<StoreSDNode>(Op);
659 SDOperand Value = SN->getValue();
660 MVT::ValueType VT = Value.getValueType();
661 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
662 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
663 SDOperand the_chain = SN->getChain();
664 //unsigned alignment = SN->getAlignment();
665 //const valtype_map_s *vtm = getValueTypeMapEntry(VT);
667 switch (SN->getAddressingMode()) {
668 case ISD::UNINDEXED: {
669 SDOperand basep = SN->getBasePtr();
673 if (basep.getOpcode() == ISD::FrameIndex) {
674 // FrameIndex nodes are always properly aligned. Really.
678 if (basep.getOpcode() == ISD::ADD) {
679 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
681 && "LowerSTORE: ISD::ADD operand 1 is not constant");
682 offset = unsigned(CN->getValue());
683 ptrOp = basep.getOperand(0);
684 DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
692 // The vector type we really want to load from the 16-byte chunk, except
693 // in the case of MVT::i1, which has to be v16i8.
694 unsigned vecVT, stVecVT;
697 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
699 stVecVT = MVT::v16i8;
700 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
702 // Realign the pointer as a D-Form address (ptrOp is the pointer, basep is
703 // the actual dform addr offs($reg).
704 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
705 DAG.getConstant((offset & ~0xf), PtrVT));
707 // Create the 16-byte aligned vector load
708 SDOperand alignLoad =
709 DAG.getLoad(vecVT, the_chain, basep,
710 SN->getSrcValue(), SN->getSrcValueOffset(),
711 SN->isVolatile(), 16);
712 the_chain = alignLoad.getValue(1);
714 LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
715 SDOperand theValue = SN->getValue();
719 && (theValue.getOpcode() == ISD::AssertZext
720 || theValue.getOpcode() == ISD::AssertSext)) {
721 // Drill down and get the value for zero- and sign-extended
723 theValue = theValue.getOperand(0);
726 SDOperand insertEltOp =
727 DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
728 DAG.getNode(SPUISD::DFormAddr, PtrVT,
730 DAG.getConstant((offset & 0xf), PtrVT)));
732 result = DAG.getNode(SPUISD::SHUFB, vecVT,
733 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
735 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
737 result = DAG.getStore(the_chain, result, basep,
738 LN->getSrcValue(), LN->getSrcValueOffset(),
739 LN->isVolatile(), LN->getAlignment());
748 case ISD::LAST_INDEXED_MODE:
749 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
751 cerr << (unsigned) SN->getAddressingMode() << "\n";
759 /// Generate the address of a constant pool entry.
761 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
762 MVT::ValueType PtrVT = Op.getValueType();
763 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
764 Constant *C = CP->getConstVal();
765 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
766 const TargetMachine &TM = DAG.getTarget();
767 SDOperand Zero = DAG.getConstant(0, PtrVT);
769 if (TM.getRelocationModel() == Reloc::Static) {
770 if (!ST->usingLargeMem()) {
771 // Just return the SDOperand with the constant pool address in it.
774 // Generate hi/lo address pair
775 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
776 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
778 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
783 "LowerConstantPool: Relocation model other than static not supported.");
788 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
789 MVT::ValueType PtrVT = Op.getValueType();
790 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
791 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
792 SDOperand Zero = DAG.getConstant(0, PtrVT);
793 const TargetMachine &TM = DAG.getTarget();
795 if (TM.getRelocationModel() == Reloc::Static) {
796 if (!ST->usingLargeMem()) {
797 // Just return the SDOperand with the jump table address in it.
800 // Generate hi/lo address pair
801 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
802 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
804 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
809 "LowerJumpTable: Relocation model other than static not supported.");
814 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
815 MVT::ValueType PtrVT = Op.getValueType();
816 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
817 GlobalValue *GV = GSDN->getGlobal();
818 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
819 SDOperand Zero = DAG.getConstant(0, PtrVT);
820 const TargetMachine &TM = DAG.getTarget();
822 if (TM.getRelocationModel() == Reloc::Static) {
823 if (!ST->usingLargeMem()) {
824 // Generate a local store address
827 // Generate hi/lo address pair
828 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
829 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
831 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
834 cerr << "LowerGlobalAddress: Relocation model other than static not "
843 //! Custom lower i64 integer constants
845 This code inserts all of the necessary juggling that needs to occur to load
846 a 64-bit constant into a register.
849 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
850 unsigned VT = Op.getValueType();
851 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
853 if (VT == MVT::i64) {
854 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
855 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
856 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
859 cerr << "LowerConstant: unhandled constant type "
860 << MVT::getValueTypeString(VT)
869 //! Custom lower single precision floating point constants
871 "float" immediates can be lowered as if they were unsigned 32-bit integers.
872 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
876 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
877 unsigned VT = Op.getValueType();
878 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
881 "LowerConstantFP: Node is not ConstantFPSDNode");
883 if (VT == MVT::f32) {
884 float targetConst = FP->getValueAPF().convertToFloat();
885 return DAG.getNode(SPUISD::SFPConstant, VT,
886 DAG.getTargetConstantFP(targetConst, VT));
887 } else if (VT == MVT::f64) {
888 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
889 return DAG.getNode(ISD::BIT_CONVERT, VT,
890 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
897 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
899 MachineFunction &MF = DAG.getMachineFunction();
900 MachineFrameInfo *MFI = MF.getFrameInfo();
901 SSARegMap *RegMap = MF.getSSARegMap();
902 SmallVector<SDOperand, 8> ArgValues;
903 SDOperand Root = Op.getOperand(0);
904 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
906 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
907 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
909 unsigned ArgOffset = SPUFrameInfo::minStackSize();
910 unsigned ArgRegIdx = 0;
911 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
913 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
915 // Add DAG nodes to load the arguments or copy them out of registers.
916 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
918 bool needsLoad = false;
919 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
920 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
924 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
925 << MVT::getValueTypeString(ObjectVT)
930 if (!isVarArg && ArgRegIdx < NumArgRegs) {
931 unsigned VReg = RegMap->createVirtualRegister(&SPU::R8CRegClass);
932 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
933 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
940 if (!isVarArg && ArgRegIdx < NumArgRegs) {
941 unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
942 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
943 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
950 if (!isVarArg && ArgRegIdx < NumArgRegs) {
951 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
952 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
953 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
960 if (!isVarArg && ArgRegIdx < NumArgRegs) {
961 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
962 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
963 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
970 if (!isVarArg && ArgRegIdx < NumArgRegs) {
971 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
972 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
973 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
980 if (!isVarArg && ArgRegIdx < NumArgRegs) {
981 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
982 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
983 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
994 if (!isVarArg && ArgRegIdx < NumArgRegs) {
995 unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
996 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
997 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1005 // We need to load the argument to a virtual register if we determined above
1006 // that we ran out of physical registers of the appropriate type
1008 // If the argument is actually used, emit a load from the right stack
1010 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
1011 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1012 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1013 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1015 // Don't emit a dead load.
1016 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1019 ArgOffset += StackSlotSize;
1022 ArgValues.push_back(ArgVal);
1025 // If the function takes variable number of arguments, make a frame index for
1026 // the start of the first vararg value... for expansion of llvm.va_start.
1028 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1030 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1031 // If this function is vararg, store any remaining integer argument regs to
1032 // their spots on the stack so that they may be loaded by deferencing the
1033 // result of va_next.
1034 SmallVector<SDOperand, 8> MemOps;
1035 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1036 unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
1037 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1038 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1039 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1040 MemOps.push_back(Store);
1041 // Increment the address by four for the next argument to store
1042 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1043 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1045 if (!MemOps.empty())
1046 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1049 ArgValues.push_back(Root);
1051 // Return the new list of results.
1052 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1053 Op.Val->value_end());
1054 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1057 /// isLSAAddress - Return the immediate to use if the specified
1058 /// value is representable as a LSA address.
1059 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1060 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1063 int Addr = C->getValue();
1064 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1065 (Addr << 14 >> 14) != Addr)
1066 return 0; // Top 14 bits have to be sext of immediate.
1068 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1073 LowerCALL(SDOperand Op, SelectionDAG &DAG) {
1074 SDOperand Chain = Op.getOperand(0);
1076 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1077 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1079 SDOperand Callee = Op.getOperand(4);
1080 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1081 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1082 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1083 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1085 // Handy pointer type
1086 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1088 // Accumulate how many bytes are to be pushed on the stack, including the
1089 // linkage area, and parameter passing area. According to the SPU ABI,
1090 // we minimally need space for [LR] and [SP]
1091 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1093 // Set up a copy of the stack pointer for use loading and storing any
1094 // arguments that may not fit in the registers available for argument
1096 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1098 // Figure out which arguments are going to go in registers, and which in
1100 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1101 unsigned ArgRegIdx = 0;
1103 // Keep track of registers passing arguments
1104 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1105 // And the arguments passed on the stack
1106 SmallVector<SDOperand, 8> MemOpChains;
1108 for (unsigned i = 0; i != NumOps; ++i) {
1109 SDOperand Arg = Op.getOperand(5+2*i);
1111 // PtrOff will be used to store the current argument to the stack if a
1112 // register cannot be found for it.
1113 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1114 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1116 switch (Arg.getValueType()) {
1117 default: assert(0 && "Unexpected ValueType for argument!");
1121 if (ArgRegIdx != NumArgRegs) {
1122 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1124 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1125 ArgOffset += StackSlotSize;
1130 if (ArgRegIdx != NumArgRegs) {
1131 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1133 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1134 ArgOffset += StackSlotSize;
1141 if (ArgRegIdx != NumArgRegs) {
1142 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1144 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1145 ArgOffset += StackSlotSize;
1151 // Update number of stack bytes actually used, insert a call sequence start
1152 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1153 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1155 if (!MemOpChains.empty()) {
1156 // Adjust the stack pointer for the stack arguments.
1157 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1158 &MemOpChains[0], MemOpChains.size());
1161 // Build a sequence of copy-to-reg nodes chained together with token chain
1162 // and flag operands which copy the outgoing args into the appropriate regs.
1164 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1165 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1167 InFlag = Chain.getValue(1);
1170 std::vector<MVT::ValueType> NodeTys;
1171 NodeTys.push_back(MVT::Other); // Returns a chain
1172 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1174 SmallVector<SDOperand, 8> Ops;
1175 unsigned CallOpc = SPUISD::CALL;
1177 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1178 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1179 // node so that legalize doesn't hack it.
1180 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1181 GlobalValue *GV = G->getGlobal();
1182 unsigned CalleeVT = Callee.getValueType();
1184 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1185 // style calls, otherwise, external symbols are BRASL calls.
1187 // This may be an unsafe assumption for JIT and really large compilation
1189 if (GV->isDeclaration()) {
1190 Callee = DAG.getGlobalAddress(GV, CalleeVT);
1192 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
1193 DAG.getTargetGlobalAddress(GV, CalleeVT),
1194 DAG.getConstant(0, PtrVT));
1196 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1197 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1198 else if (SDNode *Dest = isLSAAddress(Callee, DAG))
1199 // If this is an absolute destination address that appears to be a legal
1200 // local store address, use the munged value.
1201 Callee = SDOperand(Dest, 0);
1203 Ops.push_back(Chain);
1204 Ops.push_back(Callee);
1206 // Add argument registers to the end of the list so that they are known live
1208 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1209 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1210 RegsToPass[i].second.getValueType()));
1213 Ops.push_back(InFlag);
1214 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1215 InFlag = Chain.getValue(1);
1217 SDOperand ResultVals[3];
1218 unsigned NumResults = 0;
1221 // If the call has results, copy the values out of the ret val registers.
1222 switch (Op.Val->getValueType(0)) {
1223 default: assert(0 && "Unexpected ret value!");
1224 case MVT::Other: break;
1226 if (Op.Val->getValueType(1) == MVT::i32) {
1227 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1228 ResultVals[0] = Chain.getValue(0);
1229 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1230 Chain.getValue(2)).getValue(1);
1231 ResultVals[1] = Chain.getValue(0);
1233 NodeTys.push_back(MVT::i32);
1235 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1236 ResultVals[0] = Chain.getValue(0);
1239 NodeTys.push_back(MVT::i32);
1242 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1243 ResultVals[0] = Chain.getValue(0);
1245 NodeTys.push_back(MVT::i64);
1249 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1250 InFlag).getValue(1);
1251 ResultVals[0] = Chain.getValue(0);
1253 NodeTys.push_back(Op.Val->getValueType(0));
1260 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1261 InFlag).getValue(1);
1262 ResultVals[0] = Chain.getValue(0);
1264 NodeTys.push_back(Op.Val->getValueType(0));
1268 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1269 DAG.getConstant(NumStackBytes, PtrVT));
1270 NodeTys.push_back(MVT::Other);
1272 // If the function returns void, just return the chain.
1273 if (NumResults == 0)
1276 // Otherwise, merge everything together with a MERGE_VALUES node.
1277 ResultVals[NumResults++] = Chain;
1278 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1279 ResultVals, NumResults);
1280 return Res.getValue(Op.ResNo);
1284 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1285 SmallVector<CCValAssign, 16> RVLocs;
1286 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1287 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1288 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1289 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1291 // If this is the first return lowered for this function, add the regs to the
1292 // liveout set for the function.
1293 if (DAG.getMachineFunction().liveout_empty()) {
1294 for (unsigned i = 0; i != RVLocs.size(); ++i)
1295 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
1298 SDOperand Chain = Op.getOperand(0);
1301 // Copy the result values into the output registers.
1302 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1303 CCValAssign &VA = RVLocs[i];
1304 assert(VA.isRegLoc() && "Can only return in registers!");
1305 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1306 Flag = Chain.getValue(1);
1310 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1312 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1316 //===----------------------------------------------------------------------===//
1317 // Vector related lowering:
1318 //===----------------------------------------------------------------------===//
1320 static ConstantSDNode *
1321 getVecImm(SDNode *N) {
1322 SDOperand OpVal(0, 0);
1324 // Check to see if this buildvec has a single non-undef value in its elements.
1325 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1326 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1328 OpVal = N->getOperand(i);
1329 else if (OpVal != N->getOperand(i))
1333 if (OpVal.Val != 0) {
1334 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1339 return 0; // All UNDEF: use implicit def.; not Constant node
1342 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1343 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1345 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1346 MVT::ValueType ValueType) {
1347 if (ConstantSDNode *CN = getVecImm(N)) {
1348 uint64_t Value = CN->getValue();
1349 if (Value <= 0x3ffff)
1350 return DAG.getConstant(Value, ValueType);
1356 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1357 /// and the value fits into a signed 16-bit constant, and if so, return the
1359 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1360 MVT::ValueType ValueType) {
1361 if (ConstantSDNode *CN = getVecImm(N)) {
1362 if (ValueType == MVT::i32) {
1363 int Value = (int) CN->getValue();
1364 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1366 if (Value == SExtValue)
1367 return DAG.getConstant(Value, ValueType);
1368 } else if (ValueType == MVT::i16) {
1369 short Value = (short) CN->getValue();
1370 int SExtValue = ((int) Value << 16) >> 16;
1372 if (Value == (short) SExtValue)
1373 return DAG.getConstant(Value, ValueType);
1374 } else if (ValueType == MVT::i64) {
1375 int64_t Value = CN->getValue();
1376 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1378 if (Value == SExtValue)
1379 return DAG.getConstant(Value, ValueType);
1386 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1387 /// and the value fits into a signed 10-bit constant, and if so, return the
1389 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1390 MVT::ValueType ValueType) {
1391 if (ConstantSDNode *CN = getVecImm(N)) {
1392 int Value = (int) CN->getValue();
1393 if ((ValueType == MVT::i32 && isS10Constant(Value))
1394 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1395 return DAG.getConstant(Value, ValueType);
1401 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1402 /// and the value fits into a signed 8-bit constant, and if so, return the
1405 /// @note: The incoming vector is v16i8 because that's the only way we can load
1406 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1408 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1409 MVT::ValueType ValueType) {
1410 if (ConstantSDNode *CN = getVecImm(N)) {
1411 int Value = (int) CN->getValue();
1412 if (ValueType == MVT::i16
1413 && Value <= 0xffff /* truncated from uint64_t */
1414 && ((short) Value >> 8) == ((short) Value & 0xff))
1415 return DAG.getConstant(Value & 0xff, ValueType);
1416 else if (ValueType == MVT::i8
1417 && (Value & 0xff) == Value)
1418 return DAG.getConstant(Value, ValueType);
1424 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1425 /// and the value fits into a signed 16-bit constant, and if so, return the
1427 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1428 MVT::ValueType ValueType) {
1429 if (ConstantSDNode *CN = getVecImm(N)) {
1430 uint64_t Value = CN->getValue();
1431 if ((ValueType == MVT::i32
1432 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1433 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1434 return DAG.getConstant(Value >> 16, ValueType);
1440 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1441 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1442 if (ConstantSDNode *CN = getVecImm(N)) {
1443 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1449 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1450 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1451 if (ConstantSDNode *CN = getVecImm(N)) {
1452 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1458 // If this is a vector of constants or undefs, get the bits. A bit in
1459 // UndefBits is set if the corresponding element of the vector is an
1460 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1461 // zero. Return true if this is not an array of constants, false if it is.
1463 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1464 uint64_t UndefBits[2]) {
1465 // Start with zero'd results.
1466 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1468 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1469 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1470 SDOperand OpVal = BV->getOperand(i);
1472 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1473 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1475 uint64_t EltBits = 0;
1476 if (OpVal.getOpcode() == ISD::UNDEF) {
1477 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1478 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1480 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1481 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1482 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1483 const APFloat &apf = CN->getValueAPF();
1484 EltBits = (CN->getValueType(0) == MVT::f32
1485 ? FloatToBits(apf.convertToFloat())
1486 : DoubleToBits(apf.convertToDouble()));
1488 // Nonconstant element.
1492 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1495 //printf("%llx %llx %llx %llx\n",
1496 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1500 /// If this is a splat (repetition) of a value across the whole vector, return
1501 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1502 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1503 /// SplatSize = 1 byte.
1504 static bool isConstantSplat(const uint64_t Bits128[2],
1505 const uint64_t Undef128[2],
1507 uint64_t &SplatBits, uint64_t &SplatUndef,
1509 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1510 // the same as the lower 64-bits, ignoring undefs.
1511 uint64_t Bits64 = Bits128[0] | Bits128[1];
1512 uint64_t Undef64 = Undef128[0] & Undef128[1];
1513 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1514 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1515 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1516 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1518 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1519 if (MinSplatBits < 64) {
1521 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1523 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1524 if (MinSplatBits < 32) {
1526 // If the top 16-bits are different than the lower 16-bits, ignoring
1527 // undefs, we have an i32 splat.
1528 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1529 if (MinSplatBits < 16) {
1530 // If the top 8-bits are different than the lower 8-bits, ignoring
1531 // undefs, we have an i16 splat.
1532 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1533 // Otherwise, we have an 8-bit splat.
1534 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1535 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1541 SplatUndef = Undef16;
1548 SplatUndef = Undef32;
1554 SplatBits = Bits128[0];
1555 SplatUndef = Undef128[0];
1561 return false; // Can't be a splat if two pieces don't match.
1564 // If this is a case we can't handle, return null and let the default
1565 // expansion code take care of it. If we CAN select this case, and if it
1566 // selects to a single instruction, return Op. Otherwise, if we can codegen
1567 // this case more efficiently than a constant pool load, lower it to the
1568 // sequence of ops that should be used.
1569 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1570 MVT::ValueType VT = Op.getValueType();
1571 // If this is a vector of constants or undefs, get the bits. A bit in
1572 // UndefBits is set if the corresponding element of the vector is an
1573 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1575 uint64_t VectorBits[2];
1576 uint64_t UndefBits[2];
1577 uint64_t SplatBits, SplatUndef;
1579 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1580 || !isConstantSplat(VectorBits, UndefBits,
1581 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1582 SplatBits, SplatUndef, SplatSize))
1583 return SDOperand(); // Not a constant vector, not a splat.
1588 uint32_t Value32 = SplatBits;
1589 assert(SplatSize == 4
1590 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1591 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1592 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1593 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1594 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1598 uint64_t f64val = SplatBits;
1599 assert(SplatSize == 8
1600 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1601 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1602 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1603 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1604 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1608 // 8-bit constants have to be expanded to 16-bits
1609 unsigned short Value16 = SplatBits | (SplatBits << 8);
1611 for (int i = 0; i < 8; ++i)
1612 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1613 return DAG.getNode(ISD::BIT_CONVERT, VT,
1614 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1617 unsigned short Value16;
1619 Value16 = (unsigned short) (SplatBits & 0xffff);
1621 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1622 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1624 for (int i = 0; i < 8; ++i) Ops[i] = T;
1625 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1628 unsigned int Value = SplatBits;
1629 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1630 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1633 uint64_t val = SplatBits;
1634 uint32_t upper = uint32_t(val >> 32);
1635 uint32_t lower = uint32_t(val);
1640 SmallVector<SDOperand, 16> ShufBytes;
1642 bool upper_special, lower_special;
1644 // NOTE: This code creates common-case shuffle masks that can be easily
1645 // detected as common expressions. It is not attempting to create highly
1646 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1648 // Detect if the upper or lower half is a special shuffle mask pattern:
1649 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1650 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1652 // Create lower vector if not a special pattern
1653 if (!lower_special) {
1654 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1655 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1656 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1657 LO32C, LO32C, LO32C, LO32C));
1660 // Create upper vector if not a special pattern
1661 if (!upper_special) {
1662 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1663 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1664 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1665 HI32C, HI32C, HI32C, HI32C));
1668 // If either upper or lower are special, then the two input operands are
1669 // the same (basically, one of them is a "don't care")
1674 if (lower_special && upper_special) {
1675 // Unhappy situation... both upper and lower are special, so punt with
1676 // a target constant:
1677 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1678 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1682 for (int i = 0; i < 4; ++i) {
1683 for (int j = 0; j < 4; ++j) {
1685 bool process_upper, process_lower;
1688 process_upper = (upper_special && (i & 1) == 0);
1689 process_lower = (lower_special && (i & 1) == 1);
1691 if (process_upper || process_lower) {
1692 if ((process_upper && upper == 0)
1693 || (process_lower && lower == 0))
1695 else if ((process_upper && upper == 0xffffffff)
1696 || (process_lower && lower == 0xffffffff))
1698 else if ((process_upper && upper == 0x80000000)
1699 || (process_lower && lower == 0x80000000))
1700 val = (j == 0 ? 0xe0 : 0x80);
1702 val = i * 4 + j + ((i & 1) * 16);
1704 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1708 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1709 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1710 &ShufBytes[0], ShufBytes.size()));
1712 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1713 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1714 return DAG.getNode(ISD::BIT_CONVERT, VT,
1715 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1716 Zero, Zero, Zero, Zero));
1724 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1725 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1726 /// permutation vector, V3, is monotonically increasing with one "exception"
1727 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1728 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1729 /// In either case, the net result is going to eventually invoke SHUFB to
1730 /// permute/shuffle the bytes from V1 and V2.
1732 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1733 /// control word for byte/halfword/word insertion. This takes care of a single
1734 /// element move from V2 into V1.
1736 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1737 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1738 SDOperand V1 = Op.getOperand(0);
1739 SDOperand V2 = Op.getOperand(1);
1740 SDOperand PermMask = Op.getOperand(2);
1742 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1744 // If we have a single element being moved from V1 to V2, this can be handled
1745 // using the C*[DX] compute mask instructions, but the vector elements have
1746 // to be monotonically increasing with one exception element.
1747 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1748 unsigned EltsFromV2 = 0;
1750 unsigned V2EltIdx0 = 0;
1751 unsigned CurrElt = 0;
1752 bool monotonic = true;
1753 if (EltVT == MVT::i8)
1755 else if (EltVT == MVT::i16)
1757 else if (EltVT == MVT::i32)
1760 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1762 for (unsigned i = 0, e = PermMask.getNumOperands();
1763 EltsFromV2 <= 1 && monotonic && i != e;
1766 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1769 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1771 if (SrcElt >= V2EltIdx0) {
1773 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1774 } else if (CurrElt != SrcElt) {
1781 if (EltsFromV2 == 1 && monotonic) {
1782 // Compute mask and shuffle
1783 MachineFunction &MF = DAG.getMachineFunction();
1784 SSARegMap *RegMap = MF.getSSARegMap();
1785 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
1786 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1787 // Initialize temporary register to 0
1788 SDOperand InitTempReg =
1789 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1790 // Copy register's contents as index in INSERT_MASK:
1791 SDOperand ShufMaskOp =
1792 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1793 DAG.getTargetConstant(V2Elt, MVT::i32),
1794 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1795 // Use shuffle mask in SHUFB synthetic instruction:
1796 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1798 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1799 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1801 SmallVector<SDOperand, 16> ResultMask;
1802 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1804 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1807 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1809 for (unsigned j = 0; j != BytesPerElement; ++j) {
1810 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1815 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1816 &ResultMask[0], ResultMask.size());
1817 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1821 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1822 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1824 if (Op0.Val->getOpcode() == ISD::Constant) {
1825 // For a constant, build the appropriate constant vector, which will
1826 // eventually simplify to a vector register load.
1828 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1829 SmallVector<SDOperand, 16> ConstVecValues;
1833 // Create a constant vector:
1834 switch (Op.getValueType()) {
1835 default: assert(0 && "Unexpected constant value type in "
1836 "LowerSCALAR_TO_VECTOR");
1837 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1838 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1839 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1840 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1841 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1842 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1845 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1846 for (size_t j = 0; j < n_copies; ++j)
1847 ConstVecValues.push_back(CValue);
1849 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1850 &ConstVecValues[0], ConstVecValues.size());
1852 // Otherwise, copy the value from one register to another:
1853 switch (Op0.getValueType()) {
1854 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1861 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1868 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1869 switch (Op.getValueType()) {
1871 SDOperand rA = Op.getOperand(0);
1872 SDOperand rB = Op.getOperand(1);
1873 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1874 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1875 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1876 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1878 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1882 // Multiply two v8i16 vectors (pipeline friendly version):
1883 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1884 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1885 // c) Use SELB to select upper and lower halves from the intermediate results
1887 // NOTE: We really want to move the FSMBI to earlier to actually get the
1888 // dual-issue. This code does manage to do this, even if it's a little on
1891 MachineFunction &MF = DAG.getMachineFunction();
1892 SSARegMap *RegMap = MF.getSSARegMap();
1893 SDOperand Chain = Op.getOperand(0);
1894 SDOperand rA = Op.getOperand(0);
1895 SDOperand rB = Op.getOperand(1);
1896 unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1897 unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1900 DAG.getCopyToReg(Chain, FSMBIreg,
1901 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1902 DAG.getConstant(0xcccc, MVT::i32)));
1905 DAG.getCopyToReg(FSMBOp, HiProdReg,
1906 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1908 SDOperand HHProd_v4i32 =
1909 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1910 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1912 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1913 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1914 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1915 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1917 DAG.getConstant(16, MVT::i16))),
1918 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1921 // This M00sE is N@stI! (apologies to Monty Python)
1923 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1924 // is to break it all apart, sign extend, and reassemble the various
1925 // intermediate products.
1927 MachineFunction &MF = DAG.getMachineFunction();
1928 SSARegMap *RegMap = MF.getSSARegMap();
1929 SDOperand Chain = Op.getOperand(0);
1930 SDOperand rA = Op.getOperand(0);
1931 SDOperand rB = Op.getOperand(1);
1932 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1933 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1935 unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1936 unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1937 unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1940 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1941 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1942 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1944 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1946 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1949 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1950 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1952 SDOperand FSMBdef_2222 =
1953 DAG.getCopyToReg(Chain, FSMBreg_2222,
1954 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1955 DAG.getConstant(0x2222, MVT::i32)));
1957 SDOperand FSMBuse_2222 =
1958 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1960 SDOperand LoProd_1 =
1961 DAG.getCopyToReg(Chain, LoProd_reg,
1962 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1965 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1968 DAG.getNode(ISD::AND, MVT::v4i32,
1969 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1970 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1971 LoProdMask, LoProdMask,
1972 LoProdMask, LoProdMask));
1975 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1976 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1979 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1980 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1983 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1984 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1985 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1987 SDOperand HHProd_1 =
1988 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1989 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1990 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1991 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1992 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1995 DAG.getCopyToReg(Chain, HiProd_reg,
1996 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1998 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2002 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2003 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2005 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2006 DAG.getNode(ISD::OR, MVT::v4i32,
2011 cerr << "CellSPU: Unknown vector multiplication, got "
2012 << MVT::getValueTypeString(Op.getValueType())
2021 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2022 MachineFunction &MF = DAG.getMachineFunction();
2023 SSARegMap *RegMap = MF.getSSARegMap();
2025 SDOperand A = Op.getOperand(0);
2026 SDOperand B = Op.getOperand(1);
2027 unsigned VT = Op.getValueType();
2029 unsigned VRegBR, VRegC;
2031 if (VT == MVT::f32) {
2032 VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2033 VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2035 VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2036 VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2038 // TODO: make sure we're feeding FPInterp the right arguments
2039 // Right now: fi B, frest(B)
2042 // (Floating Interpolate (FP Reciprocal Estimate B))
2044 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2045 DAG.getNode(SPUISD::FPInterp, VT, B,
2046 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2048 // Computes A * BRcpl and stores in a temporary register
2050 DAG.getCopyToReg(BRcpl, VRegC,
2051 DAG.getNode(ISD::FMUL, VT, A,
2052 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2053 // What's the Chain variable do? It's magic!
2054 // TODO: set Chain = Op(0).getEntryNode()
2056 return DAG.getNode(ISD::FADD, VT,
2057 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2058 DAG.getNode(ISD::FMUL, VT,
2059 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2060 DAG.getNode(ISD::FSUB, VT, A,
2061 DAG.getNode(ISD::FMUL, VT, B,
2062 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2065 // Expands double-precision FDIV
2066 // Expects two doubles as inputs X and Y, does a floating point
2067 // reciprocal estimate, and three iterations of Newton-Raphson
2068 // to increase accuracy.
2069 //static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
2070 // MachineFunction &MF = DAG.getMachineFunction();
2071 // SSARegMap *RegMap = MF.getSSARegMap();
2073 // SDOperand X = Op.getOperand(0);
2074 // SDOperand Y = Op.getOperand(1);
2077 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2078 unsigned VT = Op.getValueType();
2079 SDOperand N = Op.getOperand(0);
2080 SDOperand Elt = Op.getOperand(1);
2081 SDOperand ShufMask[16];
2082 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2084 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2086 int EltNo = (int) C->getValue();
2089 if (VT == MVT::i8 && EltNo >= 16)
2090 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2091 else if (VT == MVT::i16 && EltNo >= 8)
2092 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2093 else if (VT == MVT::i32 && EltNo >= 4)
2094 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2095 else if (VT == MVT::i64 && EltNo >= 2)
2096 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2098 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2099 // i32 and i64: Element 0 is the preferred slot
2100 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2103 // Need to generate shuffle mask and extract:
2104 int prefslot_begin = -1, prefslot_end = -1;
2105 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2109 prefslot_begin = prefslot_end = 3;
2113 prefslot_begin = 2; prefslot_end = 3;
2117 prefslot_begin = 0; prefslot_end = 3;
2121 prefslot_begin = 0; prefslot_end = 7;
2126 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2127 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2129 for (int i = 0; i < 16; ++i) {
2130 // zero fill uppper part of preferred slot, don't care about the
2132 unsigned int mask_val;
2134 if (i <= prefslot_end) {
2136 ((i < prefslot_begin)
2138 : elt_byte + (i - prefslot_begin));
2140 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2142 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2145 SDOperand ShufMaskVec =
2146 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2148 sizeof(ShufMask) / sizeof(ShufMask[0]));
2150 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2151 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2152 N, N, ShufMaskVec));
2156 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2157 SDOperand VecOp = Op.getOperand(0);
2158 SDOperand ValOp = Op.getOperand(1);
2159 SDOperand IdxOp = Op.getOperand(2);
2160 MVT::ValueType VT = Op.getValueType();
2162 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2163 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2165 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2166 // Use $2 because it's always 16-byte aligned and it's available:
2167 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2170 DAG.getNode(SPUISD::SHUFB, VT,
2171 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2173 DAG.getNode(SPUISD::INSERT_MASK, VT,
2174 DAG.getNode(ISD::ADD, PtrVT,
2176 DAG.getConstant(CN->getValue(),
2182 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2183 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2185 assert(Op.getValueType() == MVT::i8);
2188 assert(0 && "Unhandled i8 math operator");
2192 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2194 SDOperand N1 = Op.getOperand(1);
2195 N0 = (N0.getOpcode() != ISD::Constant
2196 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2197 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2198 N1 = (N1.getOpcode() != ISD::Constant
2199 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2200 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2201 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2202 DAG.getNode(Opc, MVT::i16, N0, N1));
2206 SDOperand N1 = Op.getOperand(1);
2208 N0 = (N0.getOpcode() != ISD::Constant
2209 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2210 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2211 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2212 N1 = (N1.getOpcode() != ISD::Constant
2213 ? DAG.getNode(N1Opc, MVT::i16, N1)
2214 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2215 SDOperand ExpandArg =
2216 DAG.getNode(ISD::OR, MVT::i16, N0,
2217 DAG.getNode(ISD::SHL, MVT::i16,
2218 N0, DAG.getConstant(8, MVT::i16)));
2219 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2220 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2224 SDOperand N1 = Op.getOperand(1);
2226 N0 = (N0.getOpcode() != ISD::Constant
2227 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2228 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2229 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2230 N1 = (N1.getOpcode() != ISD::Constant
2231 ? DAG.getNode(N1Opc, MVT::i16, N1)
2232 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2233 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2234 DAG.getNode(Opc, MVT::i16, N0, N1));
2237 SDOperand N1 = Op.getOperand(1);
2239 N0 = (N0.getOpcode() != ISD::Constant
2240 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2241 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2242 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2243 N1 = (N1.getOpcode() != ISD::Constant
2244 ? DAG.getNode(N1Opc, MVT::i16, N1)
2245 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2246 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2247 DAG.getNode(Opc, MVT::i16, N0, N1));
2250 SDOperand N1 = Op.getOperand(1);
2252 N0 = (N0.getOpcode() != ISD::Constant
2253 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2254 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2255 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2256 N1 = (N1.getOpcode() != ISD::Constant
2257 ? DAG.getNode(N1Opc, MVT::i16, N1)
2258 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2259 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2260 DAG.getNode(Opc, MVT::i16, N0, N1));
2268 //! Lower byte immediate operations for v16i8 vectors:
2270 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2273 MVT::ValueType VT = Op.getValueType();
2275 ConstVec = Op.getOperand(0);
2276 Arg = Op.getOperand(1);
2277 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2278 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2279 ConstVec = ConstVec.getOperand(0);
2281 ConstVec = Op.getOperand(1);
2282 Arg = Op.getOperand(0);
2283 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2284 ConstVec = ConstVec.getOperand(0);
2289 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2290 uint64_t VectorBits[2];
2291 uint64_t UndefBits[2];
2292 uint64_t SplatBits, SplatUndef;
2295 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2296 && isConstantSplat(VectorBits, UndefBits,
2297 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2298 SplatBits, SplatUndef, SplatSize)) {
2299 SDOperand tcVec[16];
2300 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2301 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2303 // Turn the BUILD_VECTOR into a set of target constants:
2304 for (size_t i = 0; i < tcVecSize; ++i)
2307 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2308 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2315 //! Lower i32 multiplication
2316 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2320 cerr << "CellSPU: Unknown LowerMUL value type, got "
2321 << MVT::getValueTypeString(Op.getValueType())
2327 SDOperand rA = Op.getOperand(0);
2328 SDOperand rB = Op.getOperand(1);
2330 return DAG.getNode(ISD::ADD, MVT::i32,
2331 DAG.getNode(ISD::ADD, MVT::i32,
2332 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2333 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2334 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2341 //! Custom lowering for CTPOP (count population)
2343 Custom lowering code that counts the number ones in the input
2344 operand. SPU has such an instruction, but it counts the number of
2345 ones per byte, which then have to be accumulated.
2347 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2348 unsigned VT = Op.getValueType();
2349 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2353 SDOperand N = Op.getOperand(0);
2354 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2356 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2357 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2359 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2363 MachineFunction &MF = DAG.getMachineFunction();
2364 SSARegMap *RegMap = MF.getSSARegMap();
2366 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
2368 SDOperand N = Op.getOperand(0);
2369 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2370 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2371 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2373 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2374 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2376 // CNTB_result becomes the chain to which all of the virtual registers
2377 // CNTB_reg, SUM1_reg become associated:
2378 SDOperand CNTB_result =
2379 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2381 SDOperand CNTB_rescopy =
2382 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2384 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2386 return DAG.getNode(ISD::AND, MVT::i16,
2387 DAG.getNode(ISD::ADD, MVT::i16,
2388 DAG.getNode(ISD::SRL, MVT::i16,
2395 MachineFunction &MF = DAG.getMachineFunction();
2396 SSARegMap *RegMap = MF.getSSARegMap();
2398 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2399 unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2401 SDOperand N = Op.getOperand(0);
2402 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2403 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2404 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2405 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2407 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2408 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2410 // CNTB_result becomes the chain to which all of the virtual registers
2411 // CNTB_reg, SUM1_reg become associated:
2412 SDOperand CNTB_result =
2413 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2415 SDOperand CNTB_rescopy =
2416 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2419 DAG.getNode(ISD::SRL, MVT::i32,
2420 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2423 DAG.getNode(ISD::ADD, MVT::i32,
2424 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2426 SDOperand Sum1_rescopy =
2427 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2430 DAG.getNode(ISD::SRL, MVT::i32,
2431 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2434 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2435 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2437 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2447 /// LowerOperation - Provide custom lowering hooks for some operations.
2450 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2452 switch (Op.getOpcode()) {
2454 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2455 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2456 cerr << "*Op.Val:\n";
2463 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2465 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2466 case ISD::ConstantPool:
2467 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2468 case ISD::GlobalAddress:
2469 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2470 case ISD::JumpTable:
2471 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2473 return LowerConstant(Op, DAG);
2474 case ISD::ConstantFP:
2475 return LowerConstantFP(Op, DAG);
2476 case ISD::FORMAL_ARGUMENTS:
2477 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2479 return LowerCALL(Op, DAG);
2481 return LowerRET(Op, DAG, getTargetMachine());
2490 return LowerI8Math(Op, DAG, Op.getOpcode());
2492 // Vector-related lowering.
2493 case ISD::BUILD_VECTOR:
2494 return LowerBUILD_VECTOR(Op, DAG);
2495 case ISD::SCALAR_TO_VECTOR:
2496 return LowerSCALAR_TO_VECTOR(Op, DAG);
2497 case ISD::VECTOR_SHUFFLE:
2498 return LowerVECTOR_SHUFFLE(Op, DAG);
2499 case ISD::EXTRACT_VECTOR_ELT:
2500 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2501 case ISD::INSERT_VECTOR_ELT:
2502 return LowerINSERT_VECTOR_ELT(Op, DAG);
2504 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2508 return LowerByteImmed(Op, DAG);
2510 // Vector and i8 multiply:
2512 if (MVT::isVector(Op.getValueType()))
2513 return LowerVectorMUL(Op, DAG);
2514 else if (Op.getValueType() == MVT::i8)
2515 return LowerI8Math(Op, DAG, Op.getOpcode());
2517 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2520 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2521 return LowerFDIVf32(Op, DAG);
2522 // else if (Op.getValueType() == MVT::f64)
2523 // return LowerFDIVf64(Op, DAG);
2525 assert(0 && "Calling FDIV on unsupported MVT");
2528 return LowerCTPOP(Op, DAG);
2534 //===----------------------------------------------------------------------===//
2535 // Other Lowering Code
2536 //===----------------------------------------------------------------------===//
2539 SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2540 MachineBasicBlock *BB)
2545 //===----------------------------------------------------------------------===//
2546 // Target Optimization Hooks
2547 //===----------------------------------------------------------------------===//
2550 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2553 TargetMachine &TM = getTargetMachine();
2554 SelectionDAG &DAG = DCI.DAG;
2556 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2558 switch (N->getOpcode()) {
2561 // Look for obvious optimizations for shift left:
2562 // a) Replace 0 << V with 0
2563 // b) Replace V << 0 with V
2565 // N.B: llvm will generate an undef node if the shift amount is greater than
2566 // 15 (e.g.: V << 16), which will naturally trigger an assert.
2569 case SPU::SHLQBIIvec:
2571 case SPU::ROTHIr16_i32:
2573 case SPU::ROTIr32_i16:
2574 case SPU::ROTQBYIvec:
2575 case SPU::ROTQBYBIvec:
2576 case SPU::ROTQBIIvec:
2577 case SPU::ROTHMIr16:
2579 case SPU::ROTQMBYIvec: {
2580 if (N0.getOpcode() == ISD::Constant) {
2581 if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2582 if (C->getValue() == 0) // 0 << V -> 0.
2586 SDOperand N1 = N->getOperand(1);
2587 if (N1.getOpcode() == ISD::Constant) {
2588 if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2589 if (C->getValue() == 0) // V << 0 -> V
2600 //===----------------------------------------------------------------------===//
2601 // Inline Assembly Support
2602 //===----------------------------------------------------------------------===//
2604 /// getConstraintType - Given a constraint letter, return the type of
2605 /// constraint it is for this target.
2606 SPUTargetLowering::ConstraintType
2607 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2608 if (ConstraintLetter.size() == 1) {
2609 switch (ConstraintLetter[0]) {
2616 return C_RegisterClass;
2619 return TargetLowering::getConstraintType(ConstraintLetter);
2622 std::pair<unsigned, const TargetRegisterClass*>
2623 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2624 MVT::ValueType VT) const
2626 if (Constraint.size() == 1) {
2627 // GCC RS6000 Constraint Letters
2628 switch (Constraint[0]) {
2632 return std::make_pair(0U, SPU::R64CRegisterClass);
2633 return std::make_pair(0U, SPU::R32CRegisterClass);
2636 return std::make_pair(0U, SPU::R32FPRegisterClass);
2637 else if (VT == MVT::f64)
2638 return std::make_pair(0U, SPU::R64FPRegisterClass);
2641 return std::make_pair(0U, SPU::GPRCRegisterClass);
2645 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2649 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2651 uint64_t &KnownZero,
2653 const SelectionDAG &DAG,
2654 unsigned Depth ) const {
2659 // LowerAsmOperandForConstraint
2661 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2662 char ConstraintLetter,
2663 std::vector<SDOperand> &Ops,
2664 SelectionDAG &DAG) {
2665 // Default, for the time being, to the base class handler
2666 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2669 /// isLegalAddressImmediate - Return true if the integer value can be used
2670 /// as the offset of the target addressing mode.
2671 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2672 // SPU's addresses are 256K:
2673 return (V > -(1 << 18) && V < (1 << 18) - 1);
2676 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {