1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by a team from the Computer Systems Research
6 // Department at The Aerospace Corporation and is distributed under the
7 // University of Illinois Open Source License. See LICENSE.TXT for details.
9 //===----------------------------------------------------------------------===//
11 // This file implements the SPUTargetLowering class.
13 //===----------------------------------------------------------------------===//
15 #include "SPURegisterNames.h"
16 #include "SPUISelLowering.h"
17 #include "SPUTargetMachine.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/SSARegMap.h"
26 #include "llvm/Constants.h"
27 #include "llvm/Function.h"
28 #include "llvm/Intrinsics.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Target/TargetOptions.h"
37 // Used in getTargetNodeName() below
39 std::map<unsigned, const char *> node_names;
41 //! MVT::ValueType mapping to useful data for Cell SPU
42 struct valtype_map_s {
43 const MVT::ValueType valtype;
44 const int prefslot_byte;
47 const valtype_map_s valtype_map[] = {
58 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
60 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
61 const valtype_map_s *retval = 0;
63 for (size_t i = 0; i < n_valtype_map; ++i) {
64 if (valtype_map[i].valtype == VT) {
65 retval = valtype_map + i;
72 cerr << "getValueTypeMapEntry returns NULL for "
73 << MVT::getValueTypeString(VT)
82 //! Predicate that returns true if operand is a memory target
84 \arg Op Operand to test
85 \return true if the operand is a memory target (i.e., global
86 address, external symbol, constant pool) or an existing D-Form
89 bool isMemoryOperand(const SDOperand &Op)
91 const unsigned Opc = Op.getOpcode();
92 return (Opc == ISD::GlobalAddress
93 || Opc == ISD::GlobalTLSAddress
94 || Opc == ISD::FrameIndex
95 || Opc == ISD::JumpTable
96 || Opc == ISD::ConstantPool
97 || Opc == ISD::ExternalSymbol
98 || Opc == ISD::TargetGlobalAddress
99 || Opc == ISD::TargetGlobalTLSAddress
100 || Opc == ISD::TargetFrameIndex
101 || Opc == ISD::TargetJumpTable
102 || Opc == ISD::TargetConstantPool
103 || Opc == ISD::TargetExternalSymbol
104 || Opc == SPUISD::DFormAddr);
108 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
109 : TargetLowering(TM),
112 // Fold away setcc operations if possible.
115 // Use _setjmp/_longjmp instead of setjmp/longjmp.
116 setUseUnderscoreSetJmp(true);
117 setUseUnderscoreLongJmp(true);
119 // Set up the SPU's register classes:
120 // NOTE: i8 register class is not registered because we cannot determine when
121 // we need to zero or sign extend for custom-lowered loads and stores.
122 // NOTE: Ignore the previous note. For now. :-)
123 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
124 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
125 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
126 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
127 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
128 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
129 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
131 // SPU has no sign or zero extended loads for i1, i8, i16:
132 setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
133 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
134 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
135 setStoreXAction(MVT::i1, Custom);
137 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
138 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
139 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
140 setStoreXAction(MVT::i8, Custom);
142 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
143 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
144 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
146 // SPU constant load actions are custom lowered:
147 setOperationAction(ISD::Constant, MVT::i64, Custom);
148 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
149 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
151 // SPU's loads and stores have to be custom lowered:
152 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
154 setOperationAction(ISD::LOAD, sctype, Custom);
155 setOperationAction(ISD::STORE, sctype, Custom);
158 // SPU supports BRCOND, although DAGCombine will convert BRCONDs
159 // into BR_CCs. BR_CC instructions are custom selected in
161 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
163 // Expand the jumptable branches
164 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
165 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
166 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
168 // SPU has no intrinsics for these particular operations:
169 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
170 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
171 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
173 // PowerPC has no SREM/UREM instructions
174 setOperationAction(ISD::SREM, MVT::i32, Expand);
175 setOperationAction(ISD::UREM, MVT::i32, Expand);
176 setOperationAction(ISD::SREM, MVT::i64, Expand);
177 setOperationAction(ISD::UREM, MVT::i64, Expand);
179 // We don't support sin/cos/sqrt/fmod
180 setOperationAction(ISD::FSIN , MVT::f64, Expand);
181 setOperationAction(ISD::FCOS , MVT::f64, Expand);
182 setOperationAction(ISD::FREM , MVT::f64, Expand);
183 setOperationAction(ISD::FSIN , MVT::f32, Expand);
184 setOperationAction(ISD::FCOS , MVT::f32, Expand);
185 setOperationAction(ISD::FREM , MVT::f32, Expand);
187 // If we're enabling GP optimizations, use hardware square root
188 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
189 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
191 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
192 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
194 // SPU can do rotate right and left, so legalize it... but customize for i8
195 // because instructions don't exist.
196 setOperationAction(ISD::ROTR, MVT::i32, Legal);
197 setOperationAction(ISD::ROTR, MVT::i16, Legal);
198 setOperationAction(ISD::ROTR, MVT::i8, Custom);
199 setOperationAction(ISD::ROTL, MVT::i32, Legal);
200 setOperationAction(ISD::ROTL, MVT::i16, Legal);
201 setOperationAction(ISD::ROTL, MVT::i8, Custom);
202 // SPU has no native version of shift left/right for i8
203 setOperationAction(ISD::SHL, MVT::i8, Custom);
204 setOperationAction(ISD::SRL, MVT::i8, Custom);
205 setOperationAction(ISD::SRA, MVT::i8, Custom);
207 // Custom lower i32 multiplications
208 setOperationAction(ISD::MUL, MVT::i32, Custom);
210 // Need to custom handle (some) common i8 math ops
211 setOperationAction(ISD::SUB, MVT::i8, Custom);
212 setOperationAction(ISD::MUL, MVT::i8, Custom);
214 // SPU does not have BSWAP. It does have i32 support CTLZ.
215 // CTPOP has to be custom lowered.
216 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
217 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
219 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
220 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
221 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
222 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
224 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
225 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
227 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
229 // SPU does not have select or setcc
230 setOperationAction(ISD::SELECT, MVT::i1, Expand);
231 setOperationAction(ISD::SELECT, MVT::i8, Expand);
232 setOperationAction(ISD::SELECT, MVT::i16, Expand);
233 setOperationAction(ISD::SELECT, MVT::i32, Expand);
234 setOperationAction(ISD::SELECT, MVT::i64, Expand);
235 setOperationAction(ISD::SELECT, MVT::f32, Expand);
236 setOperationAction(ISD::SELECT, MVT::f64, Expand);
238 setOperationAction(ISD::SETCC, MVT::i1, Expand);
239 setOperationAction(ISD::SETCC, MVT::i8, Expand);
240 setOperationAction(ISD::SETCC, MVT::i16, Expand);
241 setOperationAction(ISD::SETCC, MVT::i32, Expand);
242 setOperationAction(ISD::SETCC, MVT::i64, Expand);
243 setOperationAction(ISD::SETCC, MVT::f32, Expand);
244 setOperationAction(ISD::SETCC, MVT::f64, Expand);
246 // SPU has a legal FP -> signed INT instruction
247 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
248 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
249 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
250 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
252 // FDIV on SPU requires custom lowering
253 setOperationAction(ISD::FDIV, MVT::f32, Custom);
254 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
256 // SPU has [U|S]INT_TO_FP
257 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
258 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
259 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
260 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
261 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
262 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
263 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
264 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
266 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
267 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
268 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
269 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
271 // We cannot sextinreg(i1). Expand to shifts.
272 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
274 // Support label based line numbers.
275 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
276 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
278 // We want to legalize GlobalAddress and ConstantPool nodes into the
279 // appropriate instructions to materialize the address.
280 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
281 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
282 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
283 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
284 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
285 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
286 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
287 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
289 // RET must be custom lowered, to meet ABI requirements
290 setOperationAction(ISD::RET, MVT::Other, Custom);
292 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
293 setOperationAction(ISD::VASTART , MVT::Other, Custom);
295 // Use the default implementation.
296 setOperationAction(ISD::VAARG , MVT::Other, Expand);
297 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
298 setOperationAction(ISD::VAEND , MVT::Other, Expand);
299 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
300 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
301 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
302 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
304 // Cell SPU has instructions for converting between i64 and fp.
305 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
306 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
308 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
309 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
311 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
312 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
314 // First set operation action for all vector types to expand. Then we
315 // will selectively turn on ones that can be effectively codegen'd.
316 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
317 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
318 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
319 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
320 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
321 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
323 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
324 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
325 // add/sub are legal for all supported vector VT's.
326 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
327 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
328 // mul has to be custom lowered.
329 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
331 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
332 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
333 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
334 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
335 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
336 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
338 // These operations need to be expanded:
339 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
340 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
341 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
342 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
343 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
345 // Custom lower build_vector, constant pool spills, insert and
346 // extract vector elements:
347 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
348 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
349 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
350 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
351 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
352 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
355 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
356 setOperationAction(ISD::AND, MVT::v16i8, Custom);
357 setOperationAction(ISD::OR, MVT::v16i8, Custom);
358 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
359 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
361 setSetCCResultType(MVT::i32);
362 setShiftAmountType(MVT::i32);
363 setSetCCResultContents(ZeroOrOneSetCCResult);
365 setStackPointerRegisterToSaveRestore(SPU::R1);
367 // We have target-specific dag combine patterns for the following nodes:
368 // e.g., setTargetDAGCombine(ISD::SUB);
370 computeRegisterProperties();
374 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
376 if (node_names.empty()) {
377 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
378 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
379 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
380 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
381 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
382 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
383 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
384 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
385 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
386 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
387 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
388 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
389 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
390 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
391 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
392 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
393 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
394 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
395 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
396 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
397 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
398 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
399 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
400 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
401 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
402 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
403 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
404 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
405 "SPUISD::ROTBYTES_RIGHT_Z";
406 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
407 "SPUISD::ROTBYTES_RIGHT_S";
408 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
409 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
410 "SPUISD::ROTBYTES_LEFT_CHAINED";
411 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
412 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
413 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
414 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
415 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
416 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
419 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
421 return ((i != node_names.end()) ? i->second : 0);
424 //===----------------------------------------------------------------------===//
425 // Calling convention code:
426 //===----------------------------------------------------------------------===//
428 #include "SPUGenCallingConv.inc"
430 //===----------------------------------------------------------------------===//
431 // LowerOperation implementation
432 //===----------------------------------------------------------------------===//
434 /// Custom lower loads for CellSPU
436 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
437 within a 16-byte block, we have to rotate to extract the requested element.
440 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
441 LoadSDNode *LN = cast<LoadSDNode>(Op);
442 SDOperand basep = LN->getBasePtr();
443 SDOperand the_chain = LN->getChain();
444 MVT::ValueType VT = LN->getLoadedVT();
445 MVT::ValueType OpVT = Op.Val->getValueType(0);
446 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
447 ISD::LoadExtType ExtType = LN->getExtensionType();
448 unsigned alignment = LN->getAlignment();
449 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
452 // For an extending load of an i1 variable, just call it i8 (or whatever we
453 // were passed) and make it zero-extended:
456 ExtType = ISD::ZEXTLOAD;
459 switch (LN->getAddressingMode()) {
460 case ISD::UNINDEXED: {
462 SDOperand rot_op, rotamt;
467 // The vector type we really want to be when we load the 16-byte chunk
468 MVT::ValueType vecVT, opVecVT;
471 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
475 opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
477 if (basep.getOpcode() == ISD::ADD) {
478 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
481 && "LowerLOAD: ISD::ADD operand 1 is not constant");
483 c_offset = (int) CN->getValue();
484 c_rotamt = (int) (c_offset & 0xf);
486 // Adjust the rotation amount to ensure that the final result ends up in
487 // the preferred slot:
488 c_rotamt -= vtm->prefslot_byte;
489 ptrp = basep.getOperand(0);
492 c_rotamt = -vtm->prefslot_byte;
496 if (alignment == 16) {
497 // 16-byte aligned load into preferred slot, no rotation
499 if (isMemoryOperand(ptrp))
503 // Return modified D-Form address for pointer:
504 ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
505 ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
507 return DAG.getLoad(VT, LN->getChain(), ptrp,
508 LN->getSrcValue(), LN->getSrcValueOffset(),
509 LN->isVolatile(), 16);
511 return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
512 LN->getSrcValueOffset(), OpVT,
513 LN->isVolatile(), 16);
519 // Realign the base pointer, with a D-Form address
520 if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp))
521 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
522 ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
527 rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
528 LN->getSrcValue(), LN->getSrcValueOffset(),
529 LN->isVolatile(), 16);
530 the_chain = rot_op.getValue(1);
531 rotamt = DAG.getConstant(c_rotamt, MVT::i16);
533 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
538 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
539 the_chain = result.getValue(1);
541 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
546 scalarvts = DAG.getVTList(VT, MVT::Other);
548 scalarvts = DAG.getVTList(OpVT, MVT::Other);
551 result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
555 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
556 the_chain = result.getValue(1);
558 // Handle the sign and zero-extending loads for i1 and i8:
561 if (ExtType == ISD::SEXTLOAD) {
562 NewOpC = (OpVT == MVT::i1
563 ? SPUISD::EXTRACT_I1_SEXT
564 : SPUISD::EXTRACT_I8_SEXT);
565 } else if (ExtType == ISD::ZEXTLOAD) {
566 NewOpC = (OpVT == MVT::i1
567 ? SPUISD::EXTRACT_I1_ZEXT
568 : SPUISD::EXTRACT_I8_ZEXT);
571 result = DAG.getNode(NewOpC, OpVT, result);
574 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
575 SDOperand retops[2] = { result, the_chain };
577 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
582 // Misaligned 16-byte load:
583 if (basep.getOpcode() == ISD::LOAD) {
584 LN = cast<LoadSDNode>(basep);
585 if (LN->getAlignment() == 16) {
586 // We can verify that we're really loading from a 16-byte aligned
587 // chunk. Encapsulate basep as a D-Form address and return a new
589 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
590 DAG.getConstant(0, PtrVT));
592 return DAG.getLoad(VT, LN->getChain(), basep,
593 LN->getSrcValue(), LN->getSrcValueOffset(),
594 LN->isVolatile(), 16);
596 return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
597 LN->getSrcValue(), LN->getSrcValueOffset(),
598 OpVT, LN->isVolatile(), 16);
602 // Catch all other cases where we can't guarantee that we have a
603 // 16-byte aligned entity, which means resorting to an X-form
606 SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
607 SDOperand loOp = DAG.getNode(SPUISD::Lo, VT, basep, ZeroOffs);
608 SDOperand hiOp = DAG.getNode(SPUISD::Hi, VT, basep, ZeroOffs);
610 ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
612 SDOperand alignLoad =
613 DAG.getLoad(opVecVT, LN->getChain(), ptrp,
614 LN->getSrcValue(), LN->getSrcValueOffset(),
615 LN->isVolatile(), 16);
617 SDOperand insertEltOp =
618 DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
620 result = DAG.getNode(SPUISD::SHUFB, opVecVT,
623 DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
625 result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
627 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
628 SDOperand retops[2] = { result, the_chain };
630 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
639 case ISD::LAST_INDEXED_MODE:
640 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
642 cerr << (unsigned) LN->getAddressingMode() << "\n";
650 /// Custom lower stores for CellSPU
652 All CellSPU stores are aligned to 16-byte boundaries, so for elements
653 within a 16-byte block, we have to generate a shuffle to insert the
654 requested element into its place, then store the resulting block.
657 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
658 StoreSDNode *SN = cast<StoreSDNode>(Op);
659 SDOperand Value = SN->getValue();
660 MVT::ValueType VT = Value.getValueType();
661 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
662 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
663 SDOperand the_chain = SN->getChain();
664 //unsigned alignment = SN->getAlignment();
665 //const valtype_map_s *vtm = getValueTypeMapEntry(VT);
667 switch (SN->getAddressingMode()) {
668 case ISD::UNINDEXED: {
669 SDOperand basep = SN->getBasePtr();
673 if (basep.getOpcode() == ISD::ADD) {
674 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
676 && "LowerSTORE: ISD::ADD operand 1 is not constant");
677 offset = unsigned(CN->getValue());
678 ptrOp = basep.getOperand(0);
679 DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
687 // The vector type we really want to load from the 16-byte chunk, except
688 // in the case of MVT::i1, which has to be v16i8.
689 unsigned vecVT, stVecVT;
692 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
694 stVecVT = MVT::v16i8;
695 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
697 // Realign the pointer as a D-Form address (ptrOp is the pointer,
698 // to force a register load with the address; basep is the actual
699 // dform addr offs($reg).
700 ptrOp = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
701 DAG.getConstant(0, PtrVT));
702 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
703 ptrOp, DAG.getConstant((offset & ~0xf), PtrVT));
705 // Create the 16-byte aligned vector load
706 SDOperand alignLoad =
707 DAG.getLoad(vecVT, the_chain, basep,
708 SN->getSrcValue(), SN->getSrcValueOffset(),
709 SN->isVolatile(), 16);
710 the_chain = alignLoad.getValue(1);
712 LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
713 SDOperand theValue = SN->getValue();
717 && (theValue.getOpcode() == ISD::AssertZext
718 || theValue.getOpcode() == ISD::AssertSext)) {
719 // Drill down and get the value for zero- and sign-extended
721 theValue = theValue.getOperand(0);
724 SDOperand insertEltOp =
725 DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
726 DAG.getNode(SPUISD::DFormAddr, PtrVT,
728 DAG.getConstant((offset & 0xf), PtrVT)));
730 result = DAG.getNode(SPUISD::SHUFB, vecVT,
731 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
733 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
735 result = DAG.getStore(the_chain, result, basep,
736 LN->getSrcValue(), LN->getSrcValueOffset(),
737 LN->isVolatile(), LN->getAlignment());
746 case ISD::LAST_INDEXED_MODE:
747 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
749 cerr << (unsigned) SN->getAddressingMode() << "\n";
757 /// Generate the address of a constant pool entry.
759 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
760 MVT::ValueType PtrVT = Op.getValueType();
761 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
762 Constant *C = CP->getConstVal();
763 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
764 const TargetMachine &TM = DAG.getTarget();
765 SDOperand Zero = DAG.getConstant(0, PtrVT);
767 if (TM.getRelocationModel() == Reloc::Static) {
768 if (!ST->usingLargeMem()) {
769 // Just return the SDOperand with the constant pool address in it.
772 // Generate hi/lo address pair
773 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
774 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
776 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
781 "LowerConstantPool: Relocation model other than static not supported.");
786 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
787 MVT::ValueType PtrVT = Op.getValueType();
788 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
789 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
790 SDOperand Zero = DAG.getConstant(0, PtrVT);
791 const TargetMachine &TM = DAG.getTarget();
793 if (TM.getRelocationModel() == Reloc::Static) {
794 if (!ST->usingLargeMem()) {
795 // Just return the SDOperand with the jump table address in it.
798 // Generate hi/lo address pair
799 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
800 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
802 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
807 "LowerJumpTable: Relocation model other than static not supported.");
812 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
813 MVT::ValueType PtrVT = Op.getValueType();
814 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
815 GlobalValue *GV = GSDN->getGlobal();
816 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
817 SDOperand Zero = DAG.getConstant(0, PtrVT);
818 const TargetMachine &TM = DAG.getTarget();
820 if (TM.getRelocationModel() == Reloc::Static) {
821 if (!ST->usingLargeMem()) {
822 // Generate a local store address
825 // Generate hi/lo address pair
826 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
827 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
829 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
832 cerr << "LowerGlobalAddress: Relocation model other than static not "
841 //! Custom lower i64 integer constants
843 This code inserts all of the necessary juggling that needs to occur to load
844 a 64-bit constant into a register.
847 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
848 unsigned VT = Op.getValueType();
849 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
851 if (VT == MVT::i64) {
852 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
853 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
854 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
857 cerr << "LowerConstant: unhandled constant type "
858 << MVT::getValueTypeString(VT)
867 //! Custom lower single precision floating point constants
869 "float" immediates can be lowered as if they were unsigned 32-bit integers.
870 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
874 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
875 unsigned VT = Op.getValueType();
876 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
879 "LowerConstantFP: Node is not ConstantFPSDNode");
881 const APFloat &apf = FP->getValueAPF();
883 if (VT == MVT::f32) {
884 return DAG.getNode(SPUISD::SFPConstant, VT,
885 DAG.getTargetConstantFP(apf.convertToFloat(), VT));
886 } else if (VT == MVT::f64) {
887 uint64_t dbits = DoubleToBits(apf.convertToDouble());
888 return DAG.getNode(ISD::BIT_CONVERT, VT,
889 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
896 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
898 MachineFunction &MF = DAG.getMachineFunction();
899 MachineFrameInfo *MFI = MF.getFrameInfo();
900 SSARegMap *RegMap = MF.getSSARegMap();
901 SmallVector<SDOperand, 8> ArgValues;
902 SDOperand Root = Op.getOperand(0);
903 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
905 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
906 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
908 unsigned ArgOffset = SPUFrameInfo::minStackSize();
909 unsigned ArgRegIdx = 0;
910 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
912 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
914 // Add DAG nodes to load the arguments or copy them out of registers.
915 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
917 bool needsLoad = false;
918 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
919 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
923 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
924 << MVT::getValueTypeString(ObjectVT)
929 if (!isVarArg && ArgRegIdx < NumArgRegs) {
930 unsigned VReg = RegMap->createVirtualRegister(&SPU::R8CRegClass);
931 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
932 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
939 if (!isVarArg && ArgRegIdx < NumArgRegs) {
940 unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
941 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
942 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
949 if (!isVarArg && ArgRegIdx < NumArgRegs) {
950 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
951 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
952 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
959 if (!isVarArg && ArgRegIdx < NumArgRegs) {
960 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
961 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
962 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
969 if (!isVarArg && ArgRegIdx < NumArgRegs) {
970 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
971 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
972 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
979 if (!isVarArg && ArgRegIdx < NumArgRegs) {
980 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
981 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
982 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
993 if (!isVarArg && ArgRegIdx < NumArgRegs) {
994 unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
995 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
996 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1004 // We need to load the argument to a virtual register if we determined above
1005 // that we ran out of physical registers of the appropriate type
1007 // If the argument is actually used, emit a load from the right stack
1009 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
1010 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1011 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1012 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1014 // Don't emit a dead load.
1015 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1018 ArgOffset += StackSlotSize;
1021 ArgValues.push_back(ArgVal);
1024 // If the function takes variable number of arguments, make a frame index for
1025 // the start of the first vararg value... for expansion of llvm.va_start.
1027 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1029 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1030 // If this function is vararg, store any remaining integer argument regs to
1031 // their spots on the stack so that they may be loaded by deferencing the
1032 // result of va_next.
1033 SmallVector<SDOperand, 8> MemOps;
1034 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1035 unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
1036 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1037 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1038 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1039 MemOps.push_back(Store);
1040 // Increment the address by four for the next argument to store
1041 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1042 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1044 if (!MemOps.empty())
1045 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1048 ArgValues.push_back(Root);
1050 // Return the new list of results.
1051 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1052 Op.Val->value_end());
1053 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1056 /// isLSAAddress - Return the immediate to use if the specified
1057 /// value is representable as a LSA address.
1058 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1059 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1062 int Addr = C->getValue();
1063 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1064 (Addr << 14 >> 14) != Addr)
1065 return 0; // Top 14 bits have to be sext of immediate.
1067 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1072 LowerCALL(SDOperand Op, SelectionDAG &DAG) {
1073 SDOperand Chain = Op.getOperand(0);
1075 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1076 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1078 SDOperand Callee = Op.getOperand(4);
1079 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1080 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1081 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1082 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1084 // Handy pointer type
1085 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1087 // Accumulate how many bytes are to be pushed on the stack, including the
1088 // linkage area, and parameter passing area. According to the SPU ABI,
1089 // we minimally need space for [LR] and [SP]
1090 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1092 // Set up a copy of the stack pointer for use loading and storing any
1093 // arguments that may not fit in the registers available for argument
1095 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1097 // Figure out which arguments are going to go in registers, and which in
1099 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1100 unsigned ArgRegIdx = 0;
1102 // Keep track of registers passing arguments
1103 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1104 // And the arguments passed on the stack
1105 SmallVector<SDOperand, 8> MemOpChains;
1107 for (unsigned i = 0; i != NumOps; ++i) {
1108 SDOperand Arg = Op.getOperand(5+2*i);
1110 // PtrOff will be used to store the current argument to the stack if a
1111 // register cannot be found for it.
1112 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1113 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1115 switch (Arg.getValueType()) {
1116 default: assert(0 && "Unexpected ValueType for argument!");
1120 if (ArgRegIdx != NumArgRegs) {
1121 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1123 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1124 ArgOffset += StackSlotSize;
1129 if (ArgRegIdx != NumArgRegs) {
1130 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1132 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1133 ArgOffset += StackSlotSize;
1140 if (ArgRegIdx != NumArgRegs) {
1141 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1143 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1144 ArgOffset += StackSlotSize;
1150 // Update number of stack bytes actually used, insert a call sequence start
1151 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1152 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1154 if (!MemOpChains.empty()) {
1155 // Adjust the stack pointer for the stack arguments.
1156 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1157 &MemOpChains[0], MemOpChains.size());
1160 // Build a sequence of copy-to-reg nodes chained together with token chain
1161 // and flag operands which copy the outgoing args into the appropriate regs.
1163 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1164 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1166 InFlag = Chain.getValue(1);
1169 std::vector<MVT::ValueType> NodeTys;
1170 NodeTys.push_back(MVT::Other); // Returns a chain
1171 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1173 SmallVector<SDOperand, 8> Ops;
1174 unsigned CallOpc = SPUISD::CALL;
1176 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1177 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1178 // node so that legalize doesn't hack it.
1179 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1180 GlobalValue *GV = G->getGlobal();
1181 unsigned CalleeVT = Callee.getValueType();
1183 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1184 // style calls, otherwise, external symbols are BRASL calls.
1186 // This may be an unsafe assumption for JIT and really large compilation
1188 if (GV->isDeclaration()) {
1189 Callee = DAG.getGlobalAddress(GV, CalleeVT);
1191 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
1192 DAG.getTargetGlobalAddress(GV, CalleeVT),
1193 DAG.getConstant(0, PtrVT));
1195 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1196 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1197 else if (SDNode *Dest = isLSAAddress(Callee, DAG))
1198 // If this is an absolute destination address that appears to be a legal
1199 // local store address, use the munged value.
1200 Callee = SDOperand(Dest, 0);
1202 Ops.push_back(Chain);
1203 Ops.push_back(Callee);
1205 // Add argument registers to the end of the list so that they are known live
1207 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1208 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1209 RegsToPass[i].second.getValueType()));
1212 Ops.push_back(InFlag);
1213 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1214 InFlag = Chain.getValue(1);
1216 SDOperand ResultVals[3];
1217 unsigned NumResults = 0;
1220 // If the call has results, copy the values out of the ret val registers.
1221 switch (Op.Val->getValueType(0)) {
1222 default: assert(0 && "Unexpected ret value!");
1223 case MVT::Other: break;
1225 if (Op.Val->getValueType(1) == MVT::i32) {
1226 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1227 ResultVals[0] = Chain.getValue(0);
1228 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1229 Chain.getValue(2)).getValue(1);
1230 ResultVals[1] = Chain.getValue(0);
1232 NodeTys.push_back(MVT::i32);
1234 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1235 ResultVals[0] = Chain.getValue(0);
1238 NodeTys.push_back(MVT::i32);
1241 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1242 ResultVals[0] = Chain.getValue(0);
1244 NodeTys.push_back(MVT::i64);
1248 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1249 InFlag).getValue(1);
1250 ResultVals[0] = Chain.getValue(0);
1252 NodeTys.push_back(Op.Val->getValueType(0));
1259 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1260 InFlag).getValue(1);
1261 ResultVals[0] = Chain.getValue(0);
1263 NodeTys.push_back(Op.Val->getValueType(0));
1267 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1268 DAG.getConstant(NumStackBytes, PtrVT));
1269 NodeTys.push_back(MVT::Other);
1271 // If the function returns void, just return the chain.
1272 if (NumResults == 0)
1275 // Otherwise, merge everything together with a MERGE_VALUES node.
1276 ResultVals[NumResults++] = Chain;
1277 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1278 ResultVals, NumResults);
1279 return Res.getValue(Op.ResNo);
1283 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1284 SmallVector<CCValAssign, 16> RVLocs;
1285 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1286 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1287 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1288 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1290 // If this is the first return lowered for this function, add the regs to the
1291 // liveout set for the function.
1292 if (DAG.getMachineFunction().liveout_empty()) {
1293 for (unsigned i = 0; i != RVLocs.size(); ++i)
1294 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
1297 SDOperand Chain = Op.getOperand(0);
1300 // Copy the result values into the output registers.
1301 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1302 CCValAssign &VA = RVLocs[i];
1303 assert(VA.isRegLoc() && "Can only return in registers!");
1304 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1305 Flag = Chain.getValue(1);
1309 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1311 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1315 //===----------------------------------------------------------------------===//
1316 // Vector related lowering:
1317 //===----------------------------------------------------------------------===//
1319 static ConstantSDNode *
1320 getVecImm(SDNode *N) {
1321 SDOperand OpVal(0, 0);
1323 // Check to see if this buildvec has a single non-undef value in its elements.
1324 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1325 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1327 OpVal = N->getOperand(i);
1328 else if (OpVal != N->getOperand(i))
1332 if (OpVal.Val != 0) {
1333 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1338 return 0; // All UNDEF: use implicit def.; not Constant node
1341 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1342 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1344 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1345 MVT::ValueType ValueType) {
1346 if (ConstantSDNode *CN = getVecImm(N)) {
1347 uint64_t Value = CN->getValue();
1348 if (Value <= 0x3ffff)
1349 return DAG.getConstant(Value, ValueType);
1355 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1356 /// and the value fits into a signed 16-bit constant, and if so, return the
1358 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1359 MVT::ValueType ValueType) {
1360 if (ConstantSDNode *CN = getVecImm(N)) {
1361 if (ValueType == MVT::i32) {
1362 int Value = (int) CN->getValue();
1363 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1365 if (Value == SExtValue)
1366 return DAG.getConstant(Value, ValueType);
1367 } else if (ValueType == MVT::i16) {
1368 short Value = (short) CN->getValue();
1369 int SExtValue = ((int) Value << 16) >> 16;
1371 if (Value == (short) SExtValue)
1372 return DAG.getConstant(Value, ValueType);
1373 } else if (ValueType == MVT::i64) {
1374 int64_t Value = CN->getValue();
1375 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1377 if (Value == SExtValue)
1378 return DAG.getConstant(Value, ValueType);
1385 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1386 /// and the value fits into a signed 10-bit constant, and if so, return the
1388 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1389 MVT::ValueType ValueType) {
1390 if (ConstantSDNode *CN = getVecImm(N)) {
1391 int Value = (int) CN->getValue();
1392 if ((ValueType == MVT::i32 && isS10Constant(Value))
1393 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1394 return DAG.getConstant(Value, ValueType);
1400 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1401 /// and the value fits into a signed 8-bit constant, and if so, return the
1404 /// @note: The incoming vector is v16i8 because that's the only way we can load
1405 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1407 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1408 MVT::ValueType ValueType) {
1409 if (ConstantSDNode *CN = getVecImm(N)) {
1410 int Value = (int) CN->getValue();
1411 if (ValueType == MVT::i16
1412 && Value <= 0xffff /* truncated from uint64_t */
1413 && ((short) Value >> 8) == ((short) Value & 0xff))
1414 return DAG.getConstant(Value & 0xff, ValueType);
1415 else if (ValueType == MVT::i8
1416 && (Value & 0xff) == Value)
1417 return DAG.getConstant(Value, ValueType);
1423 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1424 /// and the value fits into a signed 16-bit constant, and if so, return the
1426 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1427 MVT::ValueType ValueType) {
1428 if (ConstantSDNode *CN = getVecImm(N)) {
1429 uint64_t Value = CN->getValue();
1430 if ((ValueType == MVT::i32
1431 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1432 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1433 return DAG.getConstant(Value >> 16, ValueType);
1439 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1440 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1441 if (ConstantSDNode *CN = getVecImm(N)) {
1442 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1448 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1449 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1450 if (ConstantSDNode *CN = getVecImm(N)) {
1451 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1457 // If this is a vector of constants or undefs, get the bits. A bit in
1458 // UndefBits is set if the corresponding element of the vector is an
1459 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1460 // zero. Return true if this is not an array of constants, false if it is.
1462 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1463 uint64_t UndefBits[2]) {
1464 // Start with zero'd results.
1465 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1467 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1468 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1469 SDOperand OpVal = BV->getOperand(i);
1471 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1472 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1474 uint64_t EltBits = 0;
1475 if (OpVal.getOpcode() == ISD::UNDEF) {
1476 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1477 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1479 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1480 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1481 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1482 const APFloat &apf = CN->getValueAPF();
1483 EltBits = (CN->getValueType(0) == MVT::f32
1484 ? FloatToBits(apf.convertToFloat())
1485 : DoubleToBits(apf.convertToDouble()));
1487 // Nonconstant element.
1491 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1494 //printf("%llx %llx %llx %llx\n",
1495 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1499 /// If this is a splat (repetition) of a value across the whole vector, return
1500 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1501 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1502 /// SplatSize = 1 byte.
1503 static bool isConstantSplat(const uint64_t Bits128[2],
1504 const uint64_t Undef128[2],
1506 uint64_t &SplatBits, uint64_t &SplatUndef,
1508 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1509 // the same as the lower 64-bits, ignoring undefs.
1510 uint64_t Bits64 = Bits128[0] | Bits128[1];
1511 uint64_t Undef64 = Undef128[0] & Undef128[1];
1512 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1513 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1514 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1515 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1517 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1518 if (MinSplatBits < 64) {
1520 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1522 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1523 if (MinSplatBits < 32) {
1525 // If the top 16-bits are different than the lower 16-bits, ignoring
1526 // undefs, we have an i32 splat.
1527 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1528 if (MinSplatBits < 16) {
1529 // If the top 8-bits are different than the lower 8-bits, ignoring
1530 // undefs, we have an i16 splat.
1531 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1532 // Otherwise, we have an 8-bit splat.
1533 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1534 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1540 SplatUndef = Undef16;
1547 SplatUndef = Undef32;
1553 SplatBits = Bits128[0];
1554 SplatUndef = Undef128[0];
1560 return false; // Can't be a splat if two pieces don't match.
1563 // If this is a case we can't handle, return null and let the default
1564 // expansion code take care of it. If we CAN select this case, and if it
1565 // selects to a single instruction, return Op. Otherwise, if we can codegen
1566 // this case more efficiently than a constant pool load, lower it to the
1567 // sequence of ops that should be used.
1568 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1569 MVT::ValueType VT = Op.getValueType();
1570 // If this is a vector of constants or undefs, get the bits. A bit in
1571 // UndefBits is set if the corresponding element of the vector is an
1572 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1574 uint64_t VectorBits[2];
1575 uint64_t UndefBits[2];
1576 uint64_t SplatBits, SplatUndef;
1578 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1579 || !isConstantSplat(VectorBits, UndefBits,
1580 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1581 SplatBits, SplatUndef, SplatSize))
1582 return SDOperand(); // Not a constant vector, not a splat.
1587 uint32_t Value32 = SplatBits;
1588 assert(SplatSize == 4
1589 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1590 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1591 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1592 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1593 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1597 uint64_t f64val = SplatBits;
1598 assert(SplatSize == 8
1599 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1600 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1601 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1602 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1603 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1607 // 8-bit constants have to be expanded to 16-bits
1608 unsigned short Value16 = SplatBits | (SplatBits << 8);
1610 for (int i = 0; i < 8; ++i)
1611 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1612 return DAG.getNode(ISD::BIT_CONVERT, VT,
1613 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1616 unsigned short Value16;
1618 Value16 = (unsigned short) (SplatBits & 0xffff);
1620 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1621 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1623 for (int i = 0; i < 8; ++i) Ops[i] = T;
1624 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1627 unsigned int Value = SplatBits;
1628 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1629 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1632 uint64_t val = SplatBits;
1633 uint32_t upper = uint32_t(val >> 32);
1634 uint32_t lower = uint32_t(val);
1639 SmallVector<SDOperand, 16> ShufBytes;
1641 bool upper_special, lower_special;
1643 // NOTE: This code creates common-case shuffle masks that can be easily
1644 // detected as common expressions. It is not attempting to create highly
1645 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1647 // Detect if the upper or lower half is a special shuffle mask pattern:
1648 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1649 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1651 // Create lower vector if not a special pattern
1652 if (!lower_special) {
1653 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1654 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1655 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1656 LO32C, LO32C, LO32C, LO32C));
1659 // Create upper vector if not a special pattern
1660 if (!upper_special) {
1661 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1662 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1663 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1664 HI32C, HI32C, HI32C, HI32C));
1667 // If either upper or lower are special, then the two input operands are
1668 // the same (basically, one of them is a "don't care")
1673 if (lower_special && upper_special) {
1674 // Unhappy situation... both upper and lower are special, so punt with
1675 // a target constant:
1676 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1677 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1681 for (int i = 0; i < 4; ++i) {
1682 for (int j = 0; j < 4; ++j) {
1684 bool process_upper, process_lower;
1687 process_upper = (upper_special && (i & 1) == 0);
1688 process_lower = (lower_special && (i & 1) == 1);
1690 if (process_upper || process_lower) {
1691 if ((process_upper && upper == 0)
1692 || (process_lower && lower == 0))
1694 else if ((process_upper && upper == 0xffffffff)
1695 || (process_lower && lower == 0xffffffff))
1697 else if ((process_upper && upper == 0x80000000)
1698 || (process_lower && lower == 0x80000000))
1699 val = (j == 0 ? 0xe0 : 0x80);
1701 val = i * 4 + j + ((i & 1) * 16);
1703 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1707 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1708 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1709 &ShufBytes[0], ShufBytes.size()));
1711 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1712 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1713 return DAG.getNode(ISD::BIT_CONVERT, VT,
1714 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1715 Zero, Zero, Zero, Zero));
1723 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1724 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1725 /// permutation vector, V3, is monotonically increasing with one "exception"
1726 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1727 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1728 /// In either case, the net result is going to eventually invoke SHUFB to
1729 /// permute/shuffle the bytes from V1 and V2.
1731 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1732 /// control word for byte/halfword/word insertion. This takes care of a single
1733 /// element move from V2 into V1.
1735 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1736 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1737 SDOperand V1 = Op.getOperand(0);
1738 SDOperand V2 = Op.getOperand(1);
1739 SDOperand PermMask = Op.getOperand(2);
1741 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1743 // If we have a single element being moved from V1 to V2, this can be handled
1744 // using the C*[DX] compute mask instructions, but the vector elements have
1745 // to be monotonically increasing with one exception element.
1746 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1747 unsigned EltsFromV2 = 0;
1749 unsigned V2EltIdx0 = 0;
1750 unsigned CurrElt = 0;
1751 bool monotonic = true;
1752 if (EltVT == MVT::i8)
1754 else if (EltVT == MVT::i16)
1756 else if (EltVT == MVT::i32)
1759 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1761 for (unsigned i = 0, e = PermMask.getNumOperands();
1762 EltsFromV2 <= 1 && monotonic && i != e;
1765 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1768 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1770 if (SrcElt >= V2EltIdx0) {
1772 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1773 } else if (CurrElt != SrcElt) {
1780 if (EltsFromV2 == 1 && monotonic) {
1781 // Compute mask and shuffle
1782 MachineFunction &MF = DAG.getMachineFunction();
1783 SSARegMap *RegMap = MF.getSSARegMap();
1784 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
1785 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1786 // Initialize temporary register to 0
1787 SDOperand InitTempReg =
1788 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1789 // Copy register's contents as index in INSERT_MASK:
1790 SDOperand ShufMaskOp =
1791 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1792 DAG.getTargetConstant(V2Elt, MVT::i32),
1793 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1794 // Use shuffle mask in SHUFB synthetic instruction:
1795 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1797 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1798 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1800 SmallVector<SDOperand, 16> ResultMask;
1801 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1803 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1806 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1808 for (unsigned j = 0; j != BytesPerElement; ++j) {
1809 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1814 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1815 &ResultMask[0], ResultMask.size());
1816 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1820 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1821 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1823 if (Op0.Val->getOpcode() == ISD::Constant) {
1824 // For a constant, build the appropriate constant vector, which will
1825 // eventually simplify to a vector register load.
1827 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1828 SmallVector<SDOperand, 16> ConstVecValues;
1832 // Create a constant vector:
1833 switch (Op.getValueType()) {
1834 default: assert(0 && "Unexpected constant value type in "
1835 "LowerSCALAR_TO_VECTOR");
1836 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1837 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1838 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1839 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1840 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1841 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1844 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1845 for (size_t j = 0; j < n_copies; ++j)
1846 ConstVecValues.push_back(CValue);
1848 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1849 &ConstVecValues[0], ConstVecValues.size());
1851 // Otherwise, copy the value from one register to another:
1852 switch (Op0.getValueType()) {
1853 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1860 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1867 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1868 switch (Op.getValueType()) {
1870 SDOperand rA = Op.getOperand(0);
1871 SDOperand rB = Op.getOperand(1);
1872 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1873 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1874 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1875 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1877 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1881 // Multiply two v8i16 vectors (pipeline friendly version):
1882 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1883 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1884 // c) Use SELB to select upper and lower halves from the intermediate results
1886 // NOTE: We really want to move the FSMBI to earlier to actually get the
1887 // dual-issue. This code does manage to do this, even if it's a little on
1890 MachineFunction &MF = DAG.getMachineFunction();
1891 SSARegMap *RegMap = MF.getSSARegMap();
1892 SDOperand Chain = Op.getOperand(0);
1893 SDOperand rA = Op.getOperand(0);
1894 SDOperand rB = Op.getOperand(1);
1895 unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1896 unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1899 DAG.getCopyToReg(Chain, FSMBIreg,
1900 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1901 DAG.getConstant(0xcccc, MVT::i32)));
1904 DAG.getCopyToReg(FSMBOp, HiProdReg,
1905 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1907 SDOperand HHProd_v4i32 =
1908 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1909 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1911 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1912 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1913 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1914 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1916 DAG.getConstant(16, MVT::i16))),
1917 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1920 // This M00sE is N@stI! (apologies to Monty Python)
1922 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1923 // is to break it all apart, sign extend, and reassemble the various
1924 // intermediate products.
1926 MachineFunction &MF = DAG.getMachineFunction();
1927 SSARegMap *RegMap = MF.getSSARegMap();
1928 SDOperand Chain = Op.getOperand(0);
1929 SDOperand rA = Op.getOperand(0);
1930 SDOperand rB = Op.getOperand(1);
1931 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1932 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1934 unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1935 unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1936 unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1939 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1940 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1941 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1943 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1945 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1948 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1949 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1951 SDOperand FSMBdef_2222 =
1952 DAG.getCopyToReg(Chain, FSMBreg_2222,
1953 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1954 DAG.getConstant(0x2222, MVT::i32)));
1956 SDOperand FSMBuse_2222 =
1957 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1959 SDOperand LoProd_1 =
1960 DAG.getCopyToReg(Chain, LoProd_reg,
1961 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1964 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1967 DAG.getNode(ISD::AND, MVT::v4i32,
1968 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1969 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1970 LoProdMask, LoProdMask,
1971 LoProdMask, LoProdMask));
1974 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1975 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1978 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1979 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1982 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1983 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1984 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1986 SDOperand HHProd_1 =
1987 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1988 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1989 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1990 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1991 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1994 DAG.getCopyToReg(Chain, HiProd_reg,
1995 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1997 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2001 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2002 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2004 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2005 DAG.getNode(ISD::OR, MVT::v4i32,
2010 cerr << "CellSPU: Unknown vector multiplication, got "
2011 << MVT::getValueTypeString(Op.getValueType())
2020 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2021 MachineFunction &MF = DAG.getMachineFunction();
2022 SSARegMap *RegMap = MF.getSSARegMap();
2024 SDOperand A = Op.getOperand(0);
2025 SDOperand B = Op.getOperand(1);
2026 unsigned VT = Op.getValueType();
2028 unsigned VRegBR, VRegC;
2030 if (VT == MVT::f32) {
2031 VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2032 VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2034 VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2035 VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2037 // TODO: make sure we're feeding FPInterp the right arguments
2038 // Right now: fi B, frest(B)
2041 // (Floating Interpolate (FP Reciprocal Estimate B))
2043 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2044 DAG.getNode(SPUISD::FPInterp, VT, B,
2045 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2047 // Computes A * BRcpl and stores in a temporary register
2049 DAG.getCopyToReg(BRcpl, VRegC,
2050 DAG.getNode(ISD::FMUL, VT, A,
2051 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2052 // What's the Chain variable do? It's magic!
2053 // TODO: set Chain = Op(0).getEntryNode()
2055 return DAG.getNode(ISD::FADD, VT,
2056 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2057 DAG.getNode(ISD::FMUL, VT,
2058 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2059 DAG.getNode(ISD::FSUB, VT, A,
2060 DAG.getNode(ISD::FMUL, VT, B,
2061 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2064 // Expands double-precision FDIV
2065 // Expects two doubles as inputs X and Y, does a floating point
2066 // reciprocal estimate, and three iterations of Newton-Raphson
2067 // to increase accuracy.
2068 //static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
2069 // MachineFunction &MF = DAG.getMachineFunction();
2070 // SSARegMap *RegMap = MF.getSSARegMap();
2072 // SDOperand X = Op.getOperand(0);
2073 // SDOperand Y = Op.getOperand(1);
2076 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2077 unsigned VT = Op.getValueType();
2078 SDOperand N = Op.getOperand(0);
2079 SDOperand Elt = Op.getOperand(1);
2080 SDOperand ShufMask[16];
2081 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2083 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2085 int EltNo = (int) C->getValue();
2088 if (VT == MVT::i8 && EltNo >= 16)
2089 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2090 else if (VT == MVT::i16 && EltNo >= 8)
2091 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2092 else if (VT == MVT::i32 && EltNo >= 4)
2093 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2094 else if (VT == MVT::i64 && EltNo >= 2)
2095 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2097 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2098 // i32 and i64: Element 0 is the preferred slot
2099 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2102 // Need to generate shuffle mask and extract:
2103 int prefslot_begin, prefslot_end;
2104 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2108 prefslot_begin = prefslot_end = 3;
2112 prefslot_begin = 2; prefslot_end = 3;
2116 prefslot_begin = 0; prefslot_end = 3;
2120 prefslot_begin = 0; prefslot_end = 7;
2125 for (int i = 0; i < 16; ++i) {
2126 // zero fill uppper part of preferred slot, don't care about the
2128 unsigned int mask_val;
2130 if (i <= prefslot_end) {
2132 ((i < prefslot_begin)
2134 : elt_byte + (i - prefslot_begin));
2136 ShufMask[i] = DAG.getConstant(mask_val, MVT::i16);
2138 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2141 SDOperand ShufMaskVec =
2142 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2144 sizeof(ShufMask) / sizeof(ShufMask[0]));
2146 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2147 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2148 N, N, ShufMaskVec));
2152 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2153 SDOperand VecOp = Op.getOperand(0);
2154 SDOperand ValOp = Op.getOperand(1);
2155 SDOperand IdxOp = Op.getOperand(2);
2156 MVT::ValueType VT = Op.getValueType();
2158 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2159 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2161 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2162 // Use $2 because it's always 16-byte aligned and it's available:
2163 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2166 DAG.getNode(SPUISD::SHUFB, VT,
2167 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2169 DAG.getNode(SPUISD::INSERT_MASK, VT,
2170 DAG.getNode(ISD::ADD, PtrVT,
2172 DAG.getConstant(CN->getValue(),
2178 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2179 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2181 assert(Op.getValueType() == MVT::i8);
2184 assert(0 && "Unhandled i8 math operator");
2188 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2190 SDOperand N1 = Op.getOperand(1);
2191 N0 = (N0.getOpcode() != ISD::Constant
2192 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2193 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2194 N1 = (N1.getOpcode() != ISD::Constant
2195 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2196 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2197 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2198 DAG.getNode(Opc, MVT::i16, N0, N1));
2202 SDOperand N1 = Op.getOperand(1);
2204 N0 = (N0.getOpcode() != ISD::Constant
2205 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2206 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2207 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2208 N1 = (N1.getOpcode() != ISD::Constant
2209 ? DAG.getNode(N1Opc, MVT::i16, N1)
2210 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2211 SDOperand ExpandArg =
2212 DAG.getNode(ISD::OR, MVT::i16, N0,
2213 DAG.getNode(ISD::SHL, MVT::i16,
2214 N0, DAG.getConstant(8, MVT::i16)));
2215 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2216 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2220 SDOperand N1 = Op.getOperand(1);
2222 N0 = (N0.getOpcode() != ISD::Constant
2223 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2224 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2225 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2226 N1 = (N1.getOpcode() != ISD::Constant
2227 ? DAG.getNode(N1Opc, MVT::i16, N1)
2228 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2229 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2230 DAG.getNode(Opc, MVT::i16, N0, N1));
2233 SDOperand N1 = Op.getOperand(1);
2235 N0 = (N0.getOpcode() != ISD::Constant
2236 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2237 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2238 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2239 N1 = (N1.getOpcode() != ISD::Constant
2240 ? DAG.getNode(N1Opc, MVT::i16, N1)
2241 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2242 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2243 DAG.getNode(Opc, MVT::i16, N0, N1));
2246 SDOperand N1 = Op.getOperand(1);
2248 N0 = (N0.getOpcode() != ISD::Constant
2249 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2250 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2251 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2252 N1 = (N1.getOpcode() != ISD::Constant
2253 ? DAG.getNode(N1Opc, MVT::i16, N1)
2254 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2255 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2256 DAG.getNode(Opc, MVT::i16, N0, N1));
2264 //! Lower byte immediate operations for v16i8 vectors:
2266 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2269 MVT::ValueType VT = Op.getValueType();
2271 ConstVec = Op.getOperand(0);
2272 Arg = Op.getOperand(1);
2273 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2274 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2275 ConstVec = ConstVec.getOperand(0);
2277 ConstVec = Op.getOperand(1);
2278 Arg = Op.getOperand(0);
2279 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2280 ConstVec = ConstVec.getOperand(0);
2285 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2286 uint64_t VectorBits[2];
2287 uint64_t UndefBits[2];
2288 uint64_t SplatBits, SplatUndef;
2291 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2292 && isConstantSplat(VectorBits, UndefBits,
2293 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2294 SplatBits, SplatUndef, SplatSize)) {
2295 SDOperand tcVec[16];
2296 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2297 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2299 // Turn the BUILD_VECTOR into a set of target constants:
2300 for (size_t i = 0; i < tcVecSize; ++i)
2303 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2304 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2311 //! Lower i32 multiplication
2312 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2316 cerr << "CellSPU: Unknown LowerMUL value type, got "
2317 << MVT::getValueTypeString(Op.getValueType())
2323 SDOperand rA = Op.getOperand(0);
2324 SDOperand rB = Op.getOperand(1);
2326 return DAG.getNode(ISD::ADD, MVT::i32,
2327 DAG.getNode(ISD::ADD, MVT::i32,
2328 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2329 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2330 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2337 //! Custom lowering for CTPOP (count population)
2339 Custom lowering code that counts the number ones in the input
2340 operand. SPU has such an instruction, but it counts the number of
2341 ones per byte, which then have to be accumulated.
2343 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2344 unsigned VT = Op.getValueType();
2345 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2349 SDOperand N = Op.getOperand(0);
2350 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2352 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2353 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2355 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2359 MachineFunction &MF = DAG.getMachineFunction();
2360 SSARegMap *RegMap = MF.getSSARegMap();
2362 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
2364 SDOperand N = Op.getOperand(0);
2365 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2366 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2367 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2369 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2370 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2372 // CNTB_result becomes the chain to which all of the virtual registers
2373 // CNTB_reg, SUM1_reg become associated:
2374 SDOperand CNTB_result =
2375 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2377 SDOperand CNTB_rescopy =
2378 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2380 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2382 return DAG.getNode(ISD::AND, MVT::i16,
2383 DAG.getNode(ISD::ADD, MVT::i16,
2384 DAG.getNode(ISD::SRL, MVT::i16,
2391 MachineFunction &MF = DAG.getMachineFunction();
2392 SSARegMap *RegMap = MF.getSSARegMap();
2394 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2395 unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2397 SDOperand N = Op.getOperand(0);
2398 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2399 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2400 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2401 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2403 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2404 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2406 // CNTB_result becomes the chain to which all of the virtual registers
2407 // CNTB_reg, SUM1_reg become associated:
2408 SDOperand CNTB_result =
2409 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2411 SDOperand CNTB_rescopy =
2412 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2415 DAG.getNode(ISD::SRL, MVT::i32,
2416 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2419 DAG.getNode(ISD::ADD, MVT::i32,
2420 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2422 SDOperand Sum1_rescopy =
2423 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2426 DAG.getNode(ISD::SRL, MVT::i32,
2427 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2430 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2431 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2433 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2443 /// LowerOperation - Provide custom lowering hooks for some operations.
2446 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2448 switch (Op.getOpcode()) {
2450 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2451 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2452 cerr << "*Op.Val:\n";
2459 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2461 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2462 case ISD::ConstantPool:
2463 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2464 case ISD::GlobalAddress:
2465 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2466 case ISD::JumpTable:
2467 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2469 return LowerConstant(Op, DAG);
2470 case ISD::ConstantFP:
2471 return LowerConstantFP(Op, DAG);
2472 case ISD::FORMAL_ARGUMENTS:
2473 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2475 return LowerCALL(Op, DAG);
2477 return LowerRET(Op, DAG, getTargetMachine());
2486 return LowerI8Math(Op, DAG, Op.getOpcode());
2488 // Vector-related lowering.
2489 case ISD::BUILD_VECTOR:
2490 return LowerBUILD_VECTOR(Op, DAG);
2491 case ISD::SCALAR_TO_VECTOR:
2492 return LowerSCALAR_TO_VECTOR(Op, DAG);
2493 case ISD::VECTOR_SHUFFLE:
2494 return LowerVECTOR_SHUFFLE(Op, DAG);
2495 case ISD::EXTRACT_VECTOR_ELT:
2496 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2497 case ISD::INSERT_VECTOR_ELT:
2498 return LowerINSERT_VECTOR_ELT(Op, DAG);
2500 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2504 return LowerByteImmed(Op, DAG);
2506 // Vector and i8 multiply:
2508 if (MVT::isVector(Op.getValueType()))
2509 return LowerVectorMUL(Op, DAG);
2510 else if (Op.getValueType() == MVT::i8)
2511 return LowerI8Math(Op, DAG, Op.getOpcode());
2513 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2516 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2517 return LowerFDIVf32(Op, DAG);
2518 // else if (Op.getValueType() == MVT::f64)
2519 // return LowerFDIVf64(Op, DAG);
2521 assert(0 && "Calling FDIV on unsupported MVT");
2524 return LowerCTPOP(Op, DAG);
2530 //===----------------------------------------------------------------------===//
2531 // Other Lowering Code
2532 //===----------------------------------------------------------------------===//
2535 SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2536 MachineBasicBlock *BB)
2541 //===----------------------------------------------------------------------===//
2542 // Target Optimization Hooks
2543 //===----------------------------------------------------------------------===//
2546 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2549 TargetMachine &TM = getTargetMachine();
2550 SelectionDAG &DAG = DCI.DAG;
2552 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2554 switch (N->getOpcode()) {
2557 // Look for obvious optimizations for shift left:
2558 // a) Replace 0 << V with 0
2559 // b) Replace V << 0 with V
2561 // N.B: llvm will generate an undef node if the shift amount is greater than
2562 // 15 (e.g.: V << 16), which will naturally trigger an assert.
2565 case SPU::SHLQBIIvec:
2567 case SPU::ROTHIr16_i32:
2569 case SPU::ROTIr32_i16:
2570 case SPU::ROTQBYIvec:
2571 case SPU::ROTQBYBIvec:
2572 case SPU::ROTQBIIvec:
2573 case SPU::ROTHMIr16:
2575 case SPU::ROTQMBYIvec: {
2576 if (N0.getOpcode() == ISD::Constant) {
2577 if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2578 if (C->getValue() == 0) // 0 << V -> 0.
2582 SDOperand N1 = N->getOperand(1);
2583 if (N1.getOpcode() == ISD::Constant) {
2584 if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2585 if (C->getValue() == 0) // V << 0 -> V
2596 //===----------------------------------------------------------------------===//
2597 // Inline Assembly Support
2598 //===----------------------------------------------------------------------===//
2600 /// getConstraintType - Given a constraint letter, return the type of
2601 /// constraint it is for this target.
2602 SPUTargetLowering::ConstraintType
2603 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2604 if (ConstraintLetter.size() == 1) {
2605 switch (ConstraintLetter[0]) {
2612 return C_RegisterClass;
2615 return TargetLowering::getConstraintType(ConstraintLetter);
2618 std::pair<unsigned, const TargetRegisterClass*>
2619 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2620 MVT::ValueType VT) const
2622 if (Constraint.size() == 1) {
2623 // GCC RS6000 Constraint Letters
2624 switch (Constraint[0]) {
2628 return std::make_pair(0U, SPU::R64CRegisterClass);
2629 return std::make_pair(0U, SPU::R32CRegisterClass);
2632 return std::make_pair(0U, SPU::R32FPRegisterClass);
2633 else if (VT == MVT::f64)
2634 return std::make_pair(0U, SPU::R64FPRegisterClass);
2637 return std::make_pair(0U, SPU::GPRCRegisterClass);
2641 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2645 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2647 uint64_t &KnownZero,
2649 const SelectionDAG &DAG,
2650 unsigned Depth ) const {
2655 // LowerAsmOperandForConstraint
2657 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2658 char ConstraintLetter,
2659 std::vector<SDOperand> &Ops,
2660 SelectionDAG &DAG) {
2661 // Default, for the time being, to the base class handler
2662 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2665 /// isLegalAddressImmediate - Return true if the integer value can be used
2666 /// as the offset of the target addressing mode.
2667 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2668 // SPU's addresses are 256K:
2669 return (V > -(1 << 18) && V < (1 << 18) - 1);
2672 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {