1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by a team from the Computer Systems Research
6 // Department at The Aerospace Corporation and is distributed under the
7 // University of Illinois Open Source License. See LICENSE.TXT for details.
9 //===----------------------------------------------------------------------===//
11 // This file implements the SPUTargetLowering class.
13 //===----------------------------------------------------------------------===//
15 #include "SPURegisterNames.h"
16 #include "SPUISelLowering.h"
17 #include "SPUTargetMachine.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/SSARegMap.h"
26 #include "llvm/Constants.h"
27 #include "llvm/Function.h"
28 #include "llvm/Intrinsics.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Target/TargetOptions.h"
37 // Used in getTargetNodeName() below
39 std::map<unsigned, const char *> node_names;
41 //! MVT::ValueType mapping to useful data for Cell SPU
42 struct valtype_map_s {
43 const MVT::ValueType valtype;
44 const int prefslot_byte;
47 const valtype_map_s valtype_map[] = {
58 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
60 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
61 const valtype_map_s *retval = 0;
63 for (size_t i = 0; i < n_valtype_map; ++i) {
64 if (valtype_map[i].valtype == VT) {
65 retval = valtype_map + i;
72 cerr << "getValueTypeMapEntry returns NULL for "
73 << MVT::getValueTypeString(VT)
82 //! Predicate that returns true if operand is a memory target
84 \arg Op Operand to test
85 \return true if the operand is a memory target (i.e., global
86 address, external symbol, constant pool) or an existing D-Form
89 bool isMemoryOperand(const SDOperand &Op)
91 const unsigned Opc = Op.getOpcode();
92 return (Opc == ISD::GlobalAddress
93 || Opc == ISD::GlobalTLSAddress
94 || Opc == ISD::FrameIndex
95 || Opc == ISD::JumpTable
96 || Opc == ISD::ConstantPool
97 || Opc == ISD::ExternalSymbol
98 || Opc == ISD::TargetGlobalAddress
99 || Opc == ISD::TargetGlobalTLSAddress
100 || Opc == ISD::TargetFrameIndex
101 || Opc == ISD::TargetJumpTable
102 || Opc == ISD::TargetConstantPool
103 || Opc == ISD::TargetExternalSymbol
104 || Opc == SPUISD::DFormAddr);
108 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
109 : TargetLowering(TM),
112 // Fold away setcc operations if possible.
115 // Use _setjmp/_longjmp instead of setjmp/longjmp.
116 setUseUnderscoreSetJmp(true);
117 setUseUnderscoreLongJmp(true);
119 // Set up the SPU's register classes:
120 // NOTE: i8 register class is not registered because we cannot determine when
121 // we need to zero or sign extend for custom-lowered loads and stores.
122 // NOTE: Ignore the previous note. For now. :-)
123 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
124 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
125 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
126 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
127 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
128 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
129 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
131 // SPU has no sign or zero extended loads for i1, i8, i16:
132 setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
133 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
134 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
135 setStoreXAction(MVT::i1, Custom);
137 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
138 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
139 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
140 setStoreXAction(MVT::i8, Custom);
142 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
143 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
144 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
146 // SPU constant load actions are custom lowered:
147 setOperationAction(ISD::Constant, MVT::i64, Custom);
148 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
149 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
151 // SPU's loads and stores have to be custom lowered:
152 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
154 setOperationAction(ISD::LOAD, sctype, Custom);
155 setOperationAction(ISD::STORE, sctype, Custom);
158 // SPU supports BRCOND, although DAGCombine will convert BRCONDs
159 // into BR_CCs. BR_CC instructions are custom selected in
161 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
163 // Expand the jumptable branches
164 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
165 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
166 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
168 // SPU has no intrinsics for these particular operations:
169 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
170 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
171 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
173 // PowerPC has no SREM/UREM instructions
174 setOperationAction(ISD::SREM, MVT::i32, Expand);
175 setOperationAction(ISD::UREM, MVT::i32, Expand);
176 setOperationAction(ISD::SREM, MVT::i64, Expand);
177 setOperationAction(ISD::UREM, MVT::i64, Expand);
179 // We don't support sin/cos/sqrt/fmod
180 setOperationAction(ISD::FSIN , MVT::f64, Expand);
181 setOperationAction(ISD::FCOS , MVT::f64, Expand);
182 setOperationAction(ISD::FREM , MVT::f64, Expand);
183 setOperationAction(ISD::FSIN , MVT::f32, Expand);
184 setOperationAction(ISD::FCOS , MVT::f32, Expand);
185 setOperationAction(ISD::FREM , MVT::f32, Expand);
187 // If we're enabling GP optimizations, use hardware square root
188 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
189 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
191 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
192 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
194 // SPU can do rotate right and left, so legalize it... but customize for i8
195 // because instructions don't exist.
196 setOperationAction(ISD::ROTR, MVT::i32, Legal);
197 setOperationAction(ISD::ROTR, MVT::i16, Legal);
198 setOperationAction(ISD::ROTR, MVT::i8, Custom);
199 setOperationAction(ISD::ROTL, MVT::i32, Legal);
200 setOperationAction(ISD::ROTL, MVT::i16, Legal);
201 setOperationAction(ISD::ROTL, MVT::i8, Custom);
202 // SPU has no native version of shift left/right for i8
203 setOperationAction(ISD::SHL, MVT::i8, Custom);
204 setOperationAction(ISD::SRL, MVT::i8, Custom);
205 setOperationAction(ISD::SRA, MVT::i8, Custom);
207 // Custom lower i32 multiplications
208 setOperationAction(ISD::MUL, MVT::i32, Custom);
210 // Need to custom handle (some) common i8 math ops
211 setOperationAction(ISD::SUB, MVT::i8, Custom);
212 setOperationAction(ISD::MUL, MVT::i8, Custom);
214 // SPU does not have BSWAP. It does have i32 support CTLZ.
215 // CTPOP has to be custom lowered.
216 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
217 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
219 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
220 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
221 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
222 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
224 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
225 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
227 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
229 // SPU does not have select or setcc
230 setOperationAction(ISD::SELECT, MVT::i1, Expand);
231 setOperationAction(ISD::SELECT, MVT::i8, Expand);
232 setOperationAction(ISD::SELECT, MVT::i16, Expand);
233 setOperationAction(ISD::SELECT, MVT::i32, Expand);
234 setOperationAction(ISD::SELECT, MVT::i64, Expand);
235 setOperationAction(ISD::SELECT, MVT::f32, Expand);
236 setOperationAction(ISD::SELECT, MVT::f64, Expand);
238 setOperationAction(ISD::SETCC, MVT::i1, Expand);
239 setOperationAction(ISD::SETCC, MVT::i8, Expand);
240 setOperationAction(ISD::SETCC, MVT::i16, Expand);
241 setOperationAction(ISD::SETCC, MVT::i32, Expand);
242 setOperationAction(ISD::SETCC, MVT::i64, Expand);
243 setOperationAction(ISD::SETCC, MVT::f32, Expand);
244 setOperationAction(ISD::SETCC, MVT::f64, Expand);
246 // SPU has a legal FP -> signed INT instruction
247 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
248 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
249 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
250 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
252 // FDIV on SPU requires custom lowering
253 setOperationAction(ISD::FDIV, MVT::f32, Custom);
254 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
256 // SPU has [U|S]INT_TO_FP
257 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
258 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
259 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
260 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
261 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
262 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
263 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
264 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
266 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
267 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
268 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
269 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
271 // We cannot sextinreg(i1). Expand to shifts.
272 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
274 // Support label based line numbers.
275 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
276 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
278 // We want to legalize GlobalAddress and ConstantPool nodes into the
279 // appropriate instructions to materialize the address.
280 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
281 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
282 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
283 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
284 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
285 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
286 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
287 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
289 // RET must be custom lowered, to meet ABI requirements
290 setOperationAction(ISD::RET, MVT::Other, Custom);
292 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
293 setOperationAction(ISD::VASTART , MVT::Other, Custom);
295 // Use the default implementation.
296 setOperationAction(ISD::VAARG , MVT::Other, Expand);
297 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
298 setOperationAction(ISD::VAEND , MVT::Other, Expand);
299 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
300 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
301 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
302 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
304 // Cell SPU has instructions for converting between i64 and fp.
305 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
306 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
308 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
309 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
311 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
312 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
314 // First set operation action for all vector types to expand. Then we
315 // will selectively turn on ones that can be effectively codegen'd.
316 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
317 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
318 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
319 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
320 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
321 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
323 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
324 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
325 // add/sub are legal for all supported vector VT's.
326 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
327 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
328 // mul has to be custom lowered.
329 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
331 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
332 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
333 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
334 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
335 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
336 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
338 // These operations need to be expanded:
339 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
340 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
341 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
342 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
343 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
345 // Custom lower build_vector, constant pool spills, insert and
346 // extract vector elements:
347 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
348 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
349 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
350 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
351 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
352 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
355 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
356 setOperationAction(ISD::AND, MVT::v16i8, Custom);
357 setOperationAction(ISD::OR, MVT::v16i8, Custom);
358 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
359 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
361 setSetCCResultType(MVT::i32);
362 setShiftAmountType(MVT::i32);
363 setSetCCResultContents(ZeroOrOneSetCCResult);
365 setStackPointerRegisterToSaveRestore(SPU::R1);
367 // We have target-specific dag combine patterns for the following nodes:
368 // e.g., setTargetDAGCombine(ISD::SUB);
370 computeRegisterProperties();
374 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
376 if (node_names.empty()) {
377 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
378 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
379 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
380 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
381 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
382 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
383 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
384 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
385 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
386 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
387 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
388 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
389 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
390 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
391 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
392 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
393 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
394 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
395 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
396 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
397 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
398 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
399 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
400 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
401 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
402 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
403 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
404 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
405 "SPUISD::ROTBYTES_RIGHT_Z";
406 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
407 "SPUISD::ROTBYTES_RIGHT_S";
408 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
409 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
410 "SPUISD::ROTBYTES_LEFT_CHAINED";
411 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
412 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
413 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
414 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
415 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
416 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
419 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
421 return ((i != node_names.end()) ? i->second : 0);
424 //===----------------------------------------------------------------------===//
425 // Calling convention code:
426 //===----------------------------------------------------------------------===//
428 #include "SPUGenCallingConv.inc"
430 //===----------------------------------------------------------------------===//
431 // LowerOperation implementation
432 //===----------------------------------------------------------------------===//
434 /// Custom lower loads for CellSPU
436 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
437 within a 16-byte block, we have to rotate to extract the requested element.
440 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
441 LoadSDNode *LN = cast<LoadSDNode>(Op);
442 SDOperand basep = LN->getBasePtr();
443 SDOperand the_chain = LN->getChain();
444 MVT::ValueType VT = LN->getLoadedVT();
445 MVT::ValueType OpVT = Op.Val->getValueType(0);
446 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
447 ISD::LoadExtType ExtType = LN->getExtensionType();
448 unsigned alignment = LN->getAlignment();
449 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
452 // For an extending load of an i1 variable, just call it i8 (or whatever we
453 // were passed) and make it zero-extended:
456 ExtType = ISD::ZEXTLOAD;
459 switch (LN->getAddressingMode()) {
460 case ISD::UNINDEXED: {
462 SDOperand rot_op, rotamt;
467 // The vector type we really want to be when we load the 16-byte chunk
468 MVT::ValueType vecVT, opVecVT;
471 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
475 opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
477 if (basep.getOpcode() == ISD::ADD) {
478 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
481 && "LowerLOAD: ISD::ADD operand 1 is not constant");
483 c_offset = (int) CN->getValue();
484 c_rotamt = (int) (c_offset & 0xf);
486 // Adjust the rotation amount to ensure that the final result ends up in
487 // the preferred slot:
488 c_rotamt -= vtm->prefslot_byte;
489 ptrp = basep.getOperand(0);
492 c_rotamt = -vtm->prefslot_byte;
496 if (alignment == 16) {
497 // 16-byte aligned load into preferred slot, no rotation
499 if (isMemoryOperand(ptrp))
503 // Return modified D-Form address for pointer:
504 ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
505 ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
507 return DAG.getLoad(VT, LN->getChain(), ptrp,
508 LN->getSrcValue(), LN->getSrcValueOffset(),
509 LN->isVolatile(), 16);
511 return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
512 LN->getSrcValueOffset(), OpVT,
513 LN->isVolatile(), 16);
519 // Realign the base pointer, with a D-Form address
520 if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp))
521 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
522 ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
527 rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
528 LN->getSrcValue(), LN->getSrcValueOffset(),
529 LN->isVolatile(), 16);
530 the_chain = rot_op.getValue(1);
531 rotamt = DAG.getConstant(c_rotamt, MVT::i16);
533 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
538 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
539 the_chain = result.getValue(1);
541 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
546 scalarvts = DAG.getVTList(VT, MVT::Other);
548 scalarvts = DAG.getVTList(OpVT, MVT::Other);
551 result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
555 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
556 the_chain = result.getValue(1);
558 // Handle the sign and zero-extending loads for i1 and i8:
561 if (ExtType == ISD::SEXTLOAD) {
562 NewOpC = (OpVT == MVT::i1
563 ? SPUISD::EXTRACT_I1_SEXT
564 : SPUISD::EXTRACT_I8_SEXT);
565 } else if (ExtType == ISD::ZEXTLOAD) {
566 NewOpC = (OpVT == MVT::i1
567 ? SPUISD::EXTRACT_I1_ZEXT
568 : SPUISD::EXTRACT_I8_ZEXT);
571 result = DAG.getNode(NewOpC, OpVT, result);
574 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
575 SDOperand retops[2] = { result, the_chain };
577 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
582 // Misaligned 16-byte load:
583 if (basep.getOpcode() == ISD::LOAD) {
584 LN = cast<LoadSDNode>(basep);
585 if (LN->getAlignment() == 16) {
586 // We can verify that we're really loading from a 16-byte aligned
587 // chunk. Encapsulate basep as a D-Form address and return a new
589 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
590 DAG.getConstant(0, PtrVT));
592 return DAG.getLoad(VT, LN->getChain(), basep,
593 LN->getSrcValue(), LN->getSrcValueOffset(),
594 LN->isVolatile(), 16);
596 return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
597 LN->getSrcValue(), LN->getSrcValueOffset(),
598 OpVT, LN->isVolatile(), 16);
602 // Catch all other cases where we can't guarantee that we have a
603 // 16-byte aligned entity, which means resorting to an X-form
606 SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
607 SDOperand loOp = DAG.getNode(SPUISD::Lo, VT, basep, ZeroOffs);
608 SDOperand hiOp = DAG.getNode(SPUISD::Hi, VT, basep, ZeroOffs);
610 ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
612 SDOperand alignLoad =
613 DAG.getLoad(opVecVT, LN->getChain(), ptrp,
614 LN->getSrcValue(), LN->getSrcValueOffset(),
615 LN->isVolatile(), 16);
617 SDOperand insertEltOp =
618 DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
620 result = DAG.getNode(SPUISD::SHUFB, opVecVT,
623 DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
625 result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
627 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
628 SDOperand retops[2] = { result, the_chain };
630 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
639 case ISD::LAST_INDEXED_MODE:
640 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
642 cerr << (unsigned) LN->getAddressingMode() << "\n";
650 /// Custom lower stores for CellSPU
652 All CellSPU stores are aligned to 16-byte boundaries, so for elements
653 within a 16-byte block, we have to generate a shuffle to insert the
654 requested element into its place, then store the resulting block.
657 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
658 StoreSDNode *SN = cast<StoreSDNode>(Op);
659 SDOperand Value = SN->getValue();
660 MVT::ValueType VT = Value.getValueType();
661 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
662 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
663 SDOperand the_chain = SN->getChain();
664 //unsigned alignment = SN->getAlignment();
665 //const valtype_map_s *vtm = getValueTypeMapEntry(VT);
667 switch (SN->getAddressingMode()) {
668 case ISD::UNINDEXED: {
669 SDOperand basep = SN->getBasePtr();
673 if (basep.getOpcode() == ISD::FrameIndex) {
674 // FrameIndex nodes are always properly aligned. Really.
678 if (basep.getOpcode() == ISD::ADD) {
679 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
681 && "LowerSTORE: ISD::ADD operand 1 is not constant");
682 offset = unsigned(CN->getValue());
683 ptrOp = basep.getOperand(0);
684 DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
692 // The vector type we really want to load from the 16-byte chunk, except
693 // in the case of MVT::i1, which has to be v16i8.
694 unsigned vecVT, stVecVT;
697 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
699 stVecVT = MVT::v16i8;
700 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
702 // Realign the pointer as a D-Form address (ptrOp is the pointer, basep is
703 // the actual dform addr offs($reg).
704 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
705 DAG.getConstant((offset & ~0xf), PtrVT));
707 // Create the 16-byte aligned vector load
708 SDOperand alignLoad =
709 DAG.getLoad(vecVT, the_chain, basep,
710 SN->getSrcValue(), SN->getSrcValueOffset(),
711 SN->isVolatile(), 16);
712 the_chain = alignLoad.getValue(1);
714 LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
715 SDOperand theValue = SN->getValue();
719 && (theValue.getOpcode() == ISD::AssertZext
720 || theValue.getOpcode() == ISD::AssertSext)) {
721 // Drill down and get the value for zero- and sign-extended
723 theValue = theValue.getOperand(0);
726 SDOperand insertEltOp =
727 DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
728 DAG.getNode(SPUISD::DFormAddr, PtrVT,
730 DAG.getConstant((offset & 0xf), PtrVT)));
732 result = DAG.getNode(SPUISD::SHUFB, vecVT,
733 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
735 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
737 result = DAG.getStore(the_chain, result, basep,
738 LN->getSrcValue(), LN->getSrcValueOffset(),
739 LN->isVolatile(), LN->getAlignment());
748 case ISD::LAST_INDEXED_MODE:
749 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
751 cerr << (unsigned) SN->getAddressingMode() << "\n";
759 /// Generate the address of a constant pool entry.
761 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
762 MVT::ValueType PtrVT = Op.getValueType();
763 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
764 Constant *C = CP->getConstVal();
765 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
766 const TargetMachine &TM = DAG.getTarget();
767 SDOperand Zero = DAG.getConstant(0, PtrVT);
769 if (TM.getRelocationModel() == Reloc::Static) {
770 if (!ST->usingLargeMem()) {
771 // Just return the SDOperand with the constant pool address in it.
774 // Generate hi/lo address pair
775 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
776 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
778 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
783 "LowerConstantPool: Relocation model other than static not supported.");
788 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
789 MVT::ValueType PtrVT = Op.getValueType();
790 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
791 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
792 SDOperand Zero = DAG.getConstant(0, PtrVT);
793 const TargetMachine &TM = DAG.getTarget();
795 if (TM.getRelocationModel() == Reloc::Static) {
796 if (!ST->usingLargeMem()) {
797 // Just return the SDOperand with the jump table address in it.
800 // Generate hi/lo address pair
801 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
802 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
804 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
809 "LowerJumpTable: Relocation model other than static not supported.");
814 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
815 MVT::ValueType PtrVT = Op.getValueType();
816 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
817 GlobalValue *GV = GSDN->getGlobal();
818 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
819 SDOperand Zero = DAG.getConstant(0, PtrVT);
820 const TargetMachine &TM = DAG.getTarget();
822 if (TM.getRelocationModel() == Reloc::Static) {
823 if (!ST->usingLargeMem()) {
824 // Generate a local store address
827 // Generate hi/lo address pair
828 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
829 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
831 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
834 cerr << "LowerGlobalAddress: Relocation model other than static not "
843 //! Custom lower i64 integer constants
845 This code inserts all of the necessary juggling that needs to occur to load
846 a 64-bit constant into a register.
849 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
850 unsigned VT = Op.getValueType();
851 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
853 if (VT == MVT::i64) {
854 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
855 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
856 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
859 cerr << "LowerConstant: unhandled constant type "
860 << MVT::getValueTypeString(VT)
869 //! Custom lower single precision floating point constants
871 "float" immediates can be lowered as if they were unsigned 32-bit integers.
872 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
876 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
877 unsigned VT = Op.getValueType();
878 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
881 "LowerConstantFP: Node is not ConstantFPSDNode");
883 const APFloat &apf = FP->getValueAPF();
885 if (VT == MVT::f32) {
886 return DAG.getNode(SPUISD::SFPConstant, VT,
887 DAG.getTargetConstantFP(apf.convertToFloat(), VT));
888 } else if (VT == MVT::f64) {
889 uint64_t dbits = DoubleToBits(apf.convertToDouble());
890 return DAG.getNode(ISD::BIT_CONVERT, VT,
891 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
898 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
900 MachineFunction &MF = DAG.getMachineFunction();
901 MachineFrameInfo *MFI = MF.getFrameInfo();
902 SSARegMap *RegMap = MF.getSSARegMap();
903 SmallVector<SDOperand, 8> ArgValues;
904 SDOperand Root = Op.getOperand(0);
905 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
907 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
908 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
910 unsigned ArgOffset = SPUFrameInfo::minStackSize();
911 unsigned ArgRegIdx = 0;
912 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
914 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
916 // Add DAG nodes to load the arguments or copy them out of registers.
917 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
919 bool needsLoad = false;
920 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
921 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
925 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
926 << MVT::getValueTypeString(ObjectVT)
931 if (!isVarArg && ArgRegIdx < NumArgRegs) {
932 unsigned VReg = RegMap->createVirtualRegister(&SPU::R8CRegClass);
933 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
934 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
941 if (!isVarArg && ArgRegIdx < NumArgRegs) {
942 unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
943 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
944 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
951 if (!isVarArg && ArgRegIdx < NumArgRegs) {
952 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
953 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
954 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
961 if (!isVarArg && ArgRegIdx < NumArgRegs) {
962 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
963 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
964 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
971 if (!isVarArg && ArgRegIdx < NumArgRegs) {
972 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
973 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
974 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
981 if (!isVarArg && ArgRegIdx < NumArgRegs) {
982 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
983 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
984 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
995 if (!isVarArg && ArgRegIdx < NumArgRegs) {
996 unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
997 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
998 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1006 // We need to load the argument to a virtual register if we determined above
1007 // that we ran out of physical registers of the appropriate type
1009 // If the argument is actually used, emit a load from the right stack
1011 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
1012 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1013 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1014 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1016 // Don't emit a dead load.
1017 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1020 ArgOffset += StackSlotSize;
1023 ArgValues.push_back(ArgVal);
1026 // If the function takes variable number of arguments, make a frame index for
1027 // the start of the first vararg value... for expansion of llvm.va_start.
1029 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1031 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1032 // If this function is vararg, store any remaining integer argument regs to
1033 // their spots on the stack so that they may be loaded by deferencing the
1034 // result of va_next.
1035 SmallVector<SDOperand, 8> MemOps;
1036 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1037 unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
1038 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1039 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1040 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1041 MemOps.push_back(Store);
1042 // Increment the address by four for the next argument to store
1043 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1044 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1046 if (!MemOps.empty())
1047 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1050 ArgValues.push_back(Root);
1052 // Return the new list of results.
1053 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1054 Op.Val->value_end());
1055 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1058 /// isLSAAddress - Return the immediate to use if the specified
1059 /// value is representable as a LSA address.
1060 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1061 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1064 int Addr = C->getValue();
1065 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1066 (Addr << 14 >> 14) != Addr)
1067 return 0; // Top 14 bits have to be sext of immediate.
1069 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1074 LowerCALL(SDOperand Op, SelectionDAG &DAG) {
1075 SDOperand Chain = Op.getOperand(0);
1077 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1078 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1080 SDOperand Callee = Op.getOperand(4);
1081 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1082 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1083 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1084 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1086 // Handy pointer type
1087 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1089 // Accumulate how many bytes are to be pushed on the stack, including the
1090 // linkage area, and parameter passing area. According to the SPU ABI,
1091 // we minimally need space for [LR] and [SP]
1092 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1094 // Set up a copy of the stack pointer for use loading and storing any
1095 // arguments that may not fit in the registers available for argument
1097 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1099 // Figure out which arguments are going to go in registers, and which in
1101 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1102 unsigned ArgRegIdx = 0;
1104 // Keep track of registers passing arguments
1105 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1106 // And the arguments passed on the stack
1107 SmallVector<SDOperand, 8> MemOpChains;
1109 for (unsigned i = 0; i != NumOps; ++i) {
1110 SDOperand Arg = Op.getOperand(5+2*i);
1112 // PtrOff will be used to store the current argument to the stack if a
1113 // register cannot be found for it.
1114 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1115 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1117 switch (Arg.getValueType()) {
1118 default: assert(0 && "Unexpected ValueType for argument!");
1122 if (ArgRegIdx != NumArgRegs) {
1123 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1125 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1126 ArgOffset += StackSlotSize;
1131 if (ArgRegIdx != NumArgRegs) {
1132 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1134 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1135 ArgOffset += StackSlotSize;
1142 if (ArgRegIdx != NumArgRegs) {
1143 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1145 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1146 ArgOffset += StackSlotSize;
1152 // Update number of stack bytes actually used, insert a call sequence start
1153 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1154 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1156 if (!MemOpChains.empty()) {
1157 // Adjust the stack pointer for the stack arguments.
1158 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1159 &MemOpChains[0], MemOpChains.size());
1162 // Build a sequence of copy-to-reg nodes chained together with token chain
1163 // and flag operands which copy the outgoing args into the appropriate regs.
1165 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1166 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1168 InFlag = Chain.getValue(1);
1171 std::vector<MVT::ValueType> NodeTys;
1172 NodeTys.push_back(MVT::Other); // Returns a chain
1173 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1175 SmallVector<SDOperand, 8> Ops;
1176 unsigned CallOpc = SPUISD::CALL;
1178 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1179 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1180 // node so that legalize doesn't hack it.
1181 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1182 GlobalValue *GV = G->getGlobal();
1183 unsigned CalleeVT = Callee.getValueType();
1185 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1186 // style calls, otherwise, external symbols are BRASL calls.
1188 // This may be an unsafe assumption for JIT and really large compilation
1190 if (GV->isDeclaration()) {
1191 Callee = DAG.getGlobalAddress(GV, CalleeVT);
1193 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
1194 DAG.getTargetGlobalAddress(GV, CalleeVT),
1195 DAG.getConstant(0, PtrVT));
1197 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1198 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1199 else if (SDNode *Dest = isLSAAddress(Callee, DAG))
1200 // If this is an absolute destination address that appears to be a legal
1201 // local store address, use the munged value.
1202 Callee = SDOperand(Dest, 0);
1204 Ops.push_back(Chain);
1205 Ops.push_back(Callee);
1207 // Add argument registers to the end of the list so that they are known live
1209 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1210 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1211 RegsToPass[i].second.getValueType()));
1214 Ops.push_back(InFlag);
1215 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1216 InFlag = Chain.getValue(1);
1218 SDOperand ResultVals[3];
1219 unsigned NumResults = 0;
1222 // If the call has results, copy the values out of the ret val registers.
1223 switch (Op.Val->getValueType(0)) {
1224 default: assert(0 && "Unexpected ret value!");
1225 case MVT::Other: break;
1227 if (Op.Val->getValueType(1) == MVT::i32) {
1228 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1229 ResultVals[0] = Chain.getValue(0);
1230 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1231 Chain.getValue(2)).getValue(1);
1232 ResultVals[1] = Chain.getValue(0);
1234 NodeTys.push_back(MVT::i32);
1236 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1237 ResultVals[0] = Chain.getValue(0);
1240 NodeTys.push_back(MVT::i32);
1243 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1244 ResultVals[0] = Chain.getValue(0);
1246 NodeTys.push_back(MVT::i64);
1250 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1251 InFlag).getValue(1);
1252 ResultVals[0] = Chain.getValue(0);
1254 NodeTys.push_back(Op.Val->getValueType(0));
1261 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1262 InFlag).getValue(1);
1263 ResultVals[0] = Chain.getValue(0);
1265 NodeTys.push_back(Op.Val->getValueType(0));
1269 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1270 DAG.getConstant(NumStackBytes, PtrVT));
1271 NodeTys.push_back(MVT::Other);
1273 // If the function returns void, just return the chain.
1274 if (NumResults == 0)
1277 // Otherwise, merge everything together with a MERGE_VALUES node.
1278 ResultVals[NumResults++] = Chain;
1279 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1280 ResultVals, NumResults);
1281 return Res.getValue(Op.ResNo);
1285 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1286 SmallVector<CCValAssign, 16> RVLocs;
1287 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1288 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1289 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1290 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1292 // If this is the first return lowered for this function, add the regs to the
1293 // liveout set for the function.
1294 if (DAG.getMachineFunction().liveout_empty()) {
1295 for (unsigned i = 0; i != RVLocs.size(); ++i)
1296 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
1299 SDOperand Chain = Op.getOperand(0);
1302 // Copy the result values into the output registers.
1303 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1304 CCValAssign &VA = RVLocs[i];
1305 assert(VA.isRegLoc() && "Can only return in registers!");
1306 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1307 Flag = Chain.getValue(1);
1311 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1313 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1317 //===----------------------------------------------------------------------===//
1318 // Vector related lowering:
1319 //===----------------------------------------------------------------------===//
1321 static ConstantSDNode *
1322 getVecImm(SDNode *N) {
1323 SDOperand OpVal(0, 0);
1325 // Check to see if this buildvec has a single non-undef value in its elements.
1326 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1327 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1329 OpVal = N->getOperand(i);
1330 else if (OpVal != N->getOperand(i))
1334 if (OpVal.Val != 0) {
1335 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1340 return 0; // All UNDEF: use implicit def.; not Constant node
1343 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1344 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1346 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1347 MVT::ValueType ValueType) {
1348 if (ConstantSDNode *CN = getVecImm(N)) {
1349 uint64_t Value = CN->getValue();
1350 if (Value <= 0x3ffff)
1351 return DAG.getConstant(Value, ValueType);
1357 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1358 /// and the value fits into a signed 16-bit constant, and if so, return the
1360 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1361 MVT::ValueType ValueType) {
1362 if (ConstantSDNode *CN = getVecImm(N)) {
1363 if (ValueType == MVT::i32) {
1364 int Value = (int) CN->getValue();
1365 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1367 if (Value == SExtValue)
1368 return DAG.getConstant(Value, ValueType);
1369 } else if (ValueType == MVT::i16) {
1370 short Value = (short) CN->getValue();
1371 int SExtValue = ((int) Value << 16) >> 16;
1373 if (Value == (short) SExtValue)
1374 return DAG.getConstant(Value, ValueType);
1375 } else if (ValueType == MVT::i64) {
1376 int64_t Value = CN->getValue();
1377 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1379 if (Value == SExtValue)
1380 return DAG.getConstant(Value, ValueType);
1387 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1388 /// and the value fits into a signed 10-bit constant, and if so, return the
1390 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1391 MVT::ValueType ValueType) {
1392 if (ConstantSDNode *CN = getVecImm(N)) {
1393 int Value = (int) CN->getValue();
1394 if ((ValueType == MVT::i32 && isS10Constant(Value))
1395 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1396 return DAG.getConstant(Value, ValueType);
1402 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1403 /// and the value fits into a signed 8-bit constant, and if so, return the
1406 /// @note: The incoming vector is v16i8 because that's the only way we can load
1407 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1409 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1410 MVT::ValueType ValueType) {
1411 if (ConstantSDNode *CN = getVecImm(N)) {
1412 int Value = (int) CN->getValue();
1413 if (ValueType == MVT::i16
1414 && Value <= 0xffff /* truncated from uint64_t */
1415 && ((short) Value >> 8) == ((short) Value & 0xff))
1416 return DAG.getConstant(Value & 0xff, ValueType);
1417 else if (ValueType == MVT::i8
1418 && (Value & 0xff) == Value)
1419 return DAG.getConstant(Value, ValueType);
1425 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1426 /// and the value fits into a signed 16-bit constant, and if so, return the
1428 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1429 MVT::ValueType ValueType) {
1430 if (ConstantSDNode *CN = getVecImm(N)) {
1431 uint64_t Value = CN->getValue();
1432 if ((ValueType == MVT::i32
1433 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1434 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1435 return DAG.getConstant(Value >> 16, ValueType);
1441 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1442 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1443 if (ConstantSDNode *CN = getVecImm(N)) {
1444 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1450 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1451 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1452 if (ConstantSDNode *CN = getVecImm(N)) {
1453 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1459 // If this is a vector of constants or undefs, get the bits. A bit in
1460 // UndefBits is set if the corresponding element of the vector is an
1461 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1462 // zero. Return true if this is not an array of constants, false if it is.
1464 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1465 uint64_t UndefBits[2]) {
1466 // Start with zero'd results.
1467 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1469 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1470 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1471 SDOperand OpVal = BV->getOperand(i);
1473 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1474 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1476 uint64_t EltBits = 0;
1477 if (OpVal.getOpcode() == ISD::UNDEF) {
1478 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1479 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1481 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1482 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1483 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1484 const APFloat &apf = CN->getValueAPF();
1485 EltBits = (CN->getValueType(0) == MVT::f32
1486 ? FloatToBits(apf.convertToFloat())
1487 : DoubleToBits(apf.convertToDouble()));
1489 // Nonconstant element.
1493 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1496 //printf("%llx %llx %llx %llx\n",
1497 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1501 /// If this is a splat (repetition) of a value across the whole vector, return
1502 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1503 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1504 /// SplatSize = 1 byte.
1505 static bool isConstantSplat(const uint64_t Bits128[2],
1506 const uint64_t Undef128[2],
1508 uint64_t &SplatBits, uint64_t &SplatUndef,
1510 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1511 // the same as the lower 64-bits, ignoring undefs.
1512 uint64_t Bits64 = Bits128[0] | Bits128[1];
1513 uint64_t Undef64 = Undef128[0] & Undef128[1];
1514 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1515 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1516 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1517 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1519 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1520 if (MinSplatBits < 64) {
1522 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1524 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1525 if (MinSplatBits < 32) {
1527 // If the top 16-bits are different than the lower 16-bits, ignoring
1528 // undefs, we have an i32 splat.
1529 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1530 if (MinSplatBits < 16) {
1531 // If the top 8-bits are different than the lower 8-bits, ignoring
1532 // undefs, we have an i16 splat.
1533 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1534 // Otherwise, we have an 8-bit splat.
1535 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1536 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1542 SplatUndef = Undef16;
1549 SplatUndef = Undef32;
1555 SplatBits = Bits128[0];
1556 SplatUndef = Undef128[0];
1562 return false; // Can't be a splat if two pieces don't match.
1565 // If this is a case we can't handle, return null and let the default
1566 // expansion code take care of it. If we CAN select this case, and if it
1567 // selects to a single instruction, return Op. Otherwise, if we can codegen
1568 // this case more efficiently than a constant pool load, lower it to the
1569 // sequence of ops that should be used.
1570 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1571 MVT::ValueType VT = Op.getValueType();
1572 // If this is a vector of constants or undefs, get the bits. A bit in
1573 // UndefBits is set if the corresponding element of the vector is an
1574 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1576 uint64_t VectorBits[2];
1577 uint64_t UndefBits[2];
1578 uint64_t SplatBits, SplatUndef;
1580 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1581 || !isConstantSplat(VectorBits, UndefBits,
1582 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1583 SplatBits, SplatUndef, SplatSize))
1584 return SDOperand(); // Not a constant vector, not a splat.
1589 uint32_t Value32 = SplatBits;
1590 assert(SplatSize == 4
1591 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1592 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1593 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1594 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1595 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1599 uint64_t f64val = SplatBits;
1600 assert(SplatSize == 8
1601 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1602 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1603 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1604 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1605 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1609 // 8-bit constants have to be expanded to 16-bits
1610 unsigned short Value16 = SplatBits | (SplatBits << 8);
1612 for (int i = 0; i < 8; ++i)
1613 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1614 return DAG.getNode(ISD::BIT_CONVERT, VT,
1615 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1618 unsigned short Value16;
1620 Value16 = (unsigned short) (SplatBits & 0xffff);
1622 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1623 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1625 for (int i = 0; i < 8; ++i) Ops[i] = T;
1626 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1629 unsigned int Value = SplatBits;
1630 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1631 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1634 uint64_t val = SplatBits;
1635 uint32_t upper = uint32_t(val >> 32);
1636 uint32_t lower = uint32_t(val);
1641 SmallVector<SDOperand, 16> ShufBytes;
1643 bool upper_special, lower_special;
1645 // NOTE: This code creates common-case shuffle masks that can be easily
1646 // detected as common expressions. It is not attempting to create highly
1647 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1649 // Detect if the upper or lower half is a special shuffle mask pattern:
1650 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1651 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1653 // Create lower vector if not a special pattern
1654 if (!lower_special) {
1655 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1656 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1657 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1658 LO32C, LO32C, LO32C, LO32C));
1661 // Create upper vector if not a special pattern
1662 if (!upper_special) {
1663 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1664 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1665 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1666 HI32C, HI32C, HI32C, HI32C));
1669 // If either upper or lower are special, then the two input operands are
1670 // the same (basically, one of them is a "don't care")
1675 if (lower_special && upper_special) {
1676 // Unhappy situation... both upper and lower are special, so punt with
1677 // a target constant:
1678 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1679 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1683 for (int i = 0; i < 4; ++i) {
1684 for (int j = 0; j < 4; ++j) {
1686 bool process_upper, process_lower;
1689 process_upper = (upper_special && (i & 1) == 0);
1690 process_lower = (lower_special && (i & 1) == 1);
1692 if (process_upper || process_lower) {
1693 if ((process_upper && upper == 0)
1694 || (process_lower && lower == 0))
1696 else if ((process_upper && upper == 0xffffffff)
1697 || (process_lower && lower == 0xffffffff))
1699 else if ((process_upper && upper == 0x80000000)
1700 || (process_lower && lower == 0x80000000))
1701 val = (j == 0 ? 0xe0 : 0x80);
1703 val = i * 4 + j + ((i & 1) * 16);
1705 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1709 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1710 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1711 &ShufBytes[0], ShufBytes.size()));
1713 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1714 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1715 return DAG.getNode(ISD::BIT_CONVERT, VT,
1716 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1717 Zero, Zero, Zero, Zero));
1725 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1726 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1727 /// permutation vector, V3, is monotonically increasing with one "exception"
1728 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1729 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1730 /// In either case, the net result is going to eventually invoke SHUFB to
1731 /// permute/shuffle the bytes from V1 and V2.
1733 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1734 /// control word for byte/halfword/word insertion. This takes care of a single
1735 /// element move from V2 into V1.
1737 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1738 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1739 SDOperand V1 = Op.getOperand(0);
1740 SDOperand V2 = Op.getOperand(1);
1741 SDOperand PermMask = Op.getOperand(2);
1743 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1745 // If we have a single element being moved from V1 to V2, this can be handled
1746 // using the C*[DX] compute mask instructions, but the vector elements have
1747 // to be monotonically increasing with one exception element.
1748 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1749 unsigned EltsFromV2 = 0;
1751 unsigned V2EltIdx0 = 0;
1752 unsigned CurrElt = 0;
1753 bool monotonic = true;
1754 if (EltVT == MVT::i8)
1756 else if (EltVT == MVT::i16)
1758 else if (EltVT == MVT::i32)
1761 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1763 for (unsigned i = 0, e = PermMask.getNumOperands();
1764 EltsFromV2 <= 1 && monotonic && i != e;
1767 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1770 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1772 if (SrcElt >= V2EltIdx0) {
1774 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1775 } else if (CurrElt != SrcElt) {
1782 if (EltsFromV2 == 1 && monotonic) {
1783 // Compute mask and shuffle
1784 MachineFunction &MF = DAG.getMachineFunction();
1785 SSARegMap *RegMap = MF.getSSARegMap();
1786 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
1787 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1788 // Initialize temporary register to 0
1789 SDOperand InitTempReg =
1790 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1791 // Copy register's contents as index in INSERT_MASK:
1792 SDOperand ShufMaskOp =
1793 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1794 DAG.getTargetConstant(V2Elt, MVT::i32),
1795 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1796 // Use shuffle mask in SHUFB synthetic instruction:
1797 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1799 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1800 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1802 SmallVector<SDOperand, 16> ResultMask;
1803 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1805 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1808 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1810 for (unsigned j = 0; j != BytesPerElement; ++j) {
1811 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1816 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1817 &ResultMask[0], ResultMask.size());
1818 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1822 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1823 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1825 if (Op0.Val->getOpcode() == ISD::Constant) {
1826 // For a constant, build the appropriate constant vector, which will
1827 // eventually simplify to a vector register load.
1829 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1830 SmallVector<SDOperand, 16> ConstVecValues;
1834 // Create a constant vector:
1835 switch (Op.getValueType()) {
1836 default: assert(0 && "Unexpected constant value type in "
1837 "LowerSCALAR_TO_VECTOR");
1838 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1839 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1840 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1841 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1842 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1843 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1846 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1847 for (size_t j = 0; j < n_copies; ++j)
1848 ConstVecValues.push_back(CValue);
1850 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1851 &ConstVecValues[0], ConstVecValues.size());
1853 // Otherwise, copy the value from one register to another:
1854 switch (Op0.getValueType()) {
1855 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1862 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1869 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1870 switch (Op.getValueType()) {
1872 SDOperand rA = Op.getOperand(0);
1873 SDOperand rB = Op.getOperand(1);
1874 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1875 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1876 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1877 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1879 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1883 // Multiply two v8i16 vectors (pipeline friendly version):
1884 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1885 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1886 // c) Use SELB to select upper and lower halves from the intermediate results
1888 // NOTE: We really want to move the FSMBI to earlier to actually get the
1889 // dual-issue. This code does manage to do this, even if it's a little on
1892 MachineFunction &MF = DAG.getMachineFunction();
1893 SSARegMap *RegMap = MF.getSSARegMap();
1894 SDOperand Chain = Op.getOperand(0);
1895 SDOperand rA = Op.getOperand(0);
1896 SDOperand rB = Op.getOperand(1);
1897 unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1898 unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1901 DAG.getCopyToReg(Chain, FSMBIreg,
1902 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1903 DAG.getConstant(0xcccc, MVT::i32)));
1906 DAG.getCopyToReg(FSMBOp, HiProdReg,
1907 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1909 SDOperand HHProd_v4i32 =
1910 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1911 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1913 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1914 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1915 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1916 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1918 DAG.getConstant(16, MVT::i16))),
1919 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1922 // This M00sE is N@stI! (apologies to Monty Python)
1924 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1925 // is to break it all apart, sign extend, and reassemble the various
1926 // intermediate products.
1928 MachineFunction &MF = DAG.getMachineFunction();
1929 SSARegMap *RegMap = MF.getSSARegMap();
1930 SDOperand Chain = Op.getOperand(0);
1931 SDOperand rA = Op.getOperand(0);
1932 SDOperand rB = Op.getOperand(1);
1933 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1934 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1936 unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1937 unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1938 unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1941 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1942 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1943 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1945 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1947 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1950 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1951 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1953 SDOperand FSMBdef_2222 =
1954 DAG.getCopyToReg(Chain, FSMBreg_2222,
1955 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1956 DAG.getConstant(0x2222, MVT::i32)));
1958 SDOperand FSMBuse_2222 =
1959 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1961 SDOperand LoProd_1 =
1962 DAG.getCopyToReg(Chain, LoProd_reg,
1963 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1966 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1969 DAG.getNode(ISD::AND, MVT::v4i32,
1970 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1971 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1972 LoProdMask, LoProdMask,
1973 LoProdMask, LoProdMask));
1976 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1977 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1980 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1981 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1984 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1985 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1986 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1988 SDOperand HHProd_1 =
1989 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1990 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1991 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1992 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1993 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1996 DAG.getCopyToReg(Chain, HiProd_reg,
1997 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1999 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2003 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2004 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2006 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2007 DAG.getNode(ISD::OR, MVT::v4i32,
2012 cerr << "CellSPU: Unknown vector multiplication, got "
2013 << MVT::getValueTypeString(Op.getValueType())
2022 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2023 MachineFunction &MF = DAG.getMachineFunction();
2024 SSARegMap *RegMap = MF.getSSARegMap();
2026 SDOperand A = Op.getOperand(0);
2027 SDOperand B = Op.getOperand(1);
2028 unsigned VT = Op.getValueType();
2030 unsigned VRegBR, VRegC;
2032 if (VT == MVT::f32) {
2033 VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2034 VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2036 VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2037 VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2039 // TODO: make sure we're feeding FPInterp the right arguments
2040 // Right now: fi B, frest(B)
2043 // (Floating Interpolate (FP Reciprocal Estimate B))
2045 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2046 DAG.getNode(SPUISD::FPInterp, VT, B,
2047 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2049 // Computes A * BRcpl and stores in a temporary register
2051 DAG.getCopyToReg(BRcpl, VRegC,
2052 DAG.getNode(ISD::FMUL, VT, A,
2053 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2054 // What's the Chain variable do? It's magic!
2055 // TODO: set Chain = Op(0).getEntryNode()
2057 return DAG.getNode(ISD::FADD, VT,
2058 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2059 DAG.getNode(ISD::FMUL, VT,
2060 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2061 DAG.getNode(ISD::FSUB, VT, A,
2062 DAG.getNode(ISD::FMUL, VT, B,
2063 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2066 // Expands double-precision FDIV
2067 // Expects two doubles as inputs X and Y, does a floating point
2068 // reciprocal estimate, and three iterations of Newton-Raphson
2069 // to increase accuracy.
2070 //static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
2071 // MachineFunction &MF = DAG.getMachineFunction();
2072 // SSARegMap *RegMap = MF.getSSARegMap();
2074 // SDOperand X = Op.getOperand(0);
2075 // SDOperand Y = Op.getOperand(1);
2078 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2079 unsigned VT = Op.getValueType();
2080 SDOperand N = Op.getOperand(0);
2081 SDOperand Elt = Op.getOperand(1);
2082 SDOperand ShufMask[16];
2083 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2085 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2087 int EltNo = (int) C->getValue();
2090 if (VT == MVT::i8 && EltNo >= 16)
2091 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2092 else if (VT == MVT::i16 && EltNo >= 8)
2093 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2094 else if (VT == MVT::i32 && EltNo >= 4)
2095 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2096 else if (VT == MVT::i64 && EltNo >= 2)
2097 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2099 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2100 // i32 and i64: Element 0 is the preferred slot
2101 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2104 // Need to generate shuffle mask and extract:
2105 int prefslot_begin, prefslot_end;
2106 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2110 prefslot_begin = prefslot_end = 3;
2114 prefslot_begin = 2; prefslot_end = 3;
2118 prefslot_begin = 0; prefslot_end = 3;
2122 prefslot_begin = 0; prefslot_end = 7;
2127 for (int i = 0; i < 16; ++i) {
2128 // zero fill uppper part of preferred slot, don't care about the
2130 unsigned int mask_val;
2132 if (i <= prefslot_end) {
2134 ((i < prefslot_begin)
2136 : elt_byte + (i - prefslot_begin));
2138 ShufMask[i] = DAG.getConstant(mask_val, MVT::i16);
2140 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2143 SDOperand ShufMaskVec =
2144 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2146 sizeof(ShufMask) / sizeof(ShufMask[0]));
2148 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2149 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2150 N, N, ShufMaskVec));
2154 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2155 SDOperand VecOp = Op.getOperand(0);
2156 SDOperand ValOp = Op.getOperand(1);
2157 SDOperand IdxOp = Op.getOperand(2);
2158 MVT::ValueType VT = Op.getValueType();
2160 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2161 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2163 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2164 // Use $2 because it's always 16-byte aligned and it's available:
2165 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2168 DAG.getNode(SPUISD::SHUFB, VT,
2169 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2171 DAG.getNode(SPUISD::INSERT_MASK, VT,
2172 DAG.getNode(ISD::ADD, PtrVT,
2174 DAG.getConstant(CN->getValue(),
2180 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2181 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2183 assert(Op.getValueType() == MVT::i8);
2186 assert(0 && "Unhandled i8 math operator");
2190 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2192 SDOperand N1 = Op.getOperand(1);
2193 N0 = (N0.getOpcode() != ISD::Constant
2194 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2195 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2196 N1 = (N1.getOpcode() != ISD::Constant
2197 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2198 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2199 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2200 DAG.getNode(Opc, MVT::i16, N0, N1));
2204 SDOperand N1 = Op.getOperand(1);
2206 N0 = (N0.getOpcode() != ISD::Constant
2207 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2208 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2209 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2210 N1 = (N1.getOpcode() != ISD::Constant
2211 ? DAG.getNode(N1Opc, MVT::i16, N1)
2212 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2213 SDOperand ExpandArg =
2214 DAG.getNode(ISD::OR, MVT::i16, N0,
2215 DAG.getNode(ISD::SHL, MVT::i16,
2216 N0, DAG.getConstant(8, MVT::i16)));
2217 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2218 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2222 SDOperand N1 = Op.getOperand(1);
2224 N0 = (N0.getOpcode() != ISD::Constant
2225 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2226 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2227 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2228 N1 = (N1.getOpcode() != ISD::Constant
2229 ? DAG.getNode(N1Opc, MVT::i16, N1)
2230 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2231 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2232 DAG.getNode(Opc, MVT::i16, N0, N1));
2235 SDOperand N1 = Op.getOperand(1);
2237 N0 = (N0.getOpcode() != ISD::Constant
2238 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2239 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2240 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2241 N1 = (N1.getOpcode() != ISD::Constant
2242 ? DAG.getNode(N1Opc, MVT::i16, N1)
2243 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2244 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2245 DAG.getNode(Opc, MVT::i16, N0, N1));
2248 SDOperand N1 = Op.getOperand(1);
2250 N0 = (N0.getOpcode() != ISD::Constant
2251 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2252 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2253 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2254 N1 = (N1.getOpcode() != ISD::Constant
2255 ? DAG.getNode(N1Opc, MVT::i16, N1)
2256 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2257 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2258 DAG.getNode(Opc, MVT::i16, N0, N1));
2266 //! Lower byte immediate operations for v16i8 vectors:
2268 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2271 MVT::ValueType VT = Op.getValueType();
2273 ConstVec = Op.getOperand(0);
2274 Arg = Op.getOperand(1);
2275 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2276 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2277 ConstVec = ConstVec.getOperand(0);
2279 ConstVec = Op.getOperand(1);
2280 Arg = Op.getOperand(0);
2281 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2282 ConstVec = ConstVec.getOperand(0);
2287 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2288 uint64_t VectorBits[2];
2289 uint64_t UndefBits[2];
2290 uint64_t SplatBits, SplatUndef;
2293 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2294 && isConstantSplat(VectorBits, UndefBits,
2295 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2296 SplatBits, SplatUndef, SplatSize)) {
2297 SDOperand tcVec[16];
2298 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2299 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2301 // Turn the BUILD_VECTOR into a set of target constants:
2302 for (size_t i = 0; i < tcVecSize; ++i)
2305 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2306 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2313 //! Lower i32 multiplication
2314 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2318 cerr << "CellSPU: Unknown LowerMUL value type, got "
2319 << MVT::getValueTypeString(Op.getValueType())
2325 SDOperand rA = Op.getOperand(0);
2326 SDOperand rB = Op.getOperand(1);
2328 return DAG.getNode(ISD::ADD, MVT::i32,
2329 DAG.getNode(ISD::ADD, MVT::i32,
2330 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2331 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2332 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2339 //! Custom lowering for CTPOP (count population)
2341 Custom lowering code that counts the number ones in the input
2342 operand. SPU has such an instruction, but it counts the number of
2343 ones per byte, which then have to be accumulated.
2345 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2346 unsigned VT = Op.getValueType();
2347 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2351 SDOperand N = Op.getOperand(0);
2352 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2354 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2355 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2357 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2361 MachineFunction &MF = DAG.getMachineFunction();
2362 SSARegMap *RegMap = MF.getSSARegMap();
2364 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
2366 SDOperand N = Op.getOperand(0);
2367 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2368 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2369 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2371 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2372 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2374 // CNTB_result becomes the chain to which all of the virtual registers
2375 // CNTB_reg, SUM1_reg become associated:
2376 SDOperand CNTB_result =
2377 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2379 SDOperand CNTB_rescopy =
2380 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2382 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2384 return DAG.getNode(ISD::AND, MVT::i16,
2385 DAG.getNode(ISD::ADD, MVT::i16,
2386 DAG.getNode(ISD::SRL, MVT::i16,
2393 MachineFunction &MF = DAG.getMachineFunction();
2394 SSARegMap *RegMap = MF.getSSARegMap();
2396 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2397 unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2399 SDOperand N = Op.getOperand(0);
2400 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2401 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2402 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2403 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2405 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2406 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2408 // CNTB_result becomes the chain to which all of the virtual registers
2409 // CNTB_reg, SUM1_reg become associated:
2410 SDOperand CNTB_result =
2411 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2413 SDOperand CNTB_rescopy =
2414 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2417 DAG.getNode(ISD::SRL, MVT::i32,
2418 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2421 DAG.getNode(ISD::ADD, MVT::i32,
2422 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2424 SDOperand Sum1_rescopy =
2425 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2428 DAG.getNode(ISD::SRL, MVT::i32,
2429 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2432 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2433 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2435 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2445 /// LowerOperation - Provide custom lowering hooks for some operations.
2448 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2450 switch (Op.getOpcode()) {
2452 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2453 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2454 cerr << "*Op.Val:\n";
2461 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2463 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2464 case ISD::ConstantPool:
2465 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2466 case ISD::GlobalAddress:
2467 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2468 case ISD::JumpTable:
2469 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2471 return LowerConstant(Op, DAG);
2472 case ISD::ConstantFP:
2473 return LowerConstantFP(Op, DAG);
2474 case ISD::FORMAL_ARGUMENTS:
2475 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2477 return LowerCALL(Op, DAG);
2479 return LowerRET(Op, DAG, getTargetMachine());
2488 return LowerI8Math(Op, DAG, Op.getOpcode());
2490 // Vector-related lowering.
2491 case ISD::BUILD_VECTOR:
2492 return LowerBUILD_VECTOR(Op, DAG);
2493 case ISD::SCALAR_TO_VECTOR:
2494 return LowerSCALAR_TO_VECTOR(Op, DAG);
2495 case ISD::VECTOR_SHUFFLE:
2496 return LowerVECTOR_SHUFFLE(Op, DAG);
2497 case ISD::EXTRACT_VECTOR_ELT:
2498 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2499 case ISD::INSERT_VECTOR_ELT:
2500 return LowerINSERT_VECTOR_ELT(Op, DAG);
2502 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2506 return LowerByteImmed(Op, DAG);
2508 // Vector and i8 multiply:
2510 if (MVT::isVector(Op.getValueType()))
2511 return LowerVectorMUL(Op, DAG);
2512 else if (Op.getValueType() == MVT::i8)
2513 return LowerI8Math(Op, DAG, Op.getOpcode());
2515 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2518 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2519 return LowerFDIVf32(Op, DAG);
2520 // else if (Op.getValueType() == MVT::f64)
2521 // return LowerFDIVf64(Op, DAG);
2523 assert(0 && "Calling FDIV on unsupported MVT");
2526 return LowerCTPOP(Op, DAG);
2532 //===----------------------------------------------------------------------===//
2533 // Other Lowering Code
2534 //===----------------------------------------------------------------------===//
2537 SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2538 MachineBasicBlock *BB)
2543 //===----------------------------------------------------------------------===//
2544 // Target Optimization Hooks
2545 //===----------------------------------------------------------------------===//
2548 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2551 TargetMachine &TM = getTargetMachine();
2552 SelectionDAG &DAG = DCI.DAG;
2554 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2556 switch (N->getOpcode()) {
2559 // Look for obvious optimizations for shift left:
2560 // a) Replace 0 << V with 0
2561 // b) Replace V << 0 with V
2563 // N.B: llvm will generate an undef node if the shift amount is greater than
2564 // 15 (e.g.: V << 16), which will naturally trigger an assert.
2567 case SPU::SHLQBIIvec:
2569 case SPU::ROTHIr16_i32:
2571 case SPU::ROTIr32_i16:
2572 case SPU::ROTQBYIvec:
2573 case SPU::ROTQBYBIvec:
2574 case SPU::ROTQBIIvec:
2575 case SPU::ROTHMIr16:
2577 case SPU::ROTQMBYIvec: {
2578 if (N0.getOpcode() == ISD::Constant) {
2579 if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2580 if (C->getValue() == 0) // 0 << V -> 0.
2584 SDOperand N1 = N->getOperand(1);
2585 if (N1.getOpcode() == ISD::Constant) {
2586 if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2587 if (C->getValue() == 0) // V << 0 -> V
2598 //===----------------------------------------------------------------------===//
2599 // Inline Assembly Support
2600 //===----------------------------------------------------------------------===//
2602 /// getConstraintType - Given a constraint letter, return the type of
2603 /// constraint it is for this target.
2604 SPUTargetLowering::ConstraintType
2605 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2606 if (ConstraintLetter.size() == 1) {
2607 switch (ConstraintLetter[0]) {
2614 return C_RegisterClass;
2617 return TargetLowering::getConstraintType(ConstraintLetter);
2620 std::pair<unsigned, const TargetRegisterClass*>
2621 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2622 MVT::ValueType VT) const
2624 if (Constraint.size() == 1) {
2625 // GCC RS6000 Constraint Letters
2626 switch (Constraint[0]) {
2630 return std::make_pair(0U, SPU::R64CRegisterClass);
2631 return std::make_pair(0U, SPU::R32CRegisterClass);
2634 return std::make_pair(0U, SPU::R32FPRegisterClass);
2635 else if (VT == MVT::f64)
2636 return std::make_pair(0U, SPU::R64FPRegisterClass);
2639 return std::make_pair(0U, SPU::GPRCRegisterClass);
2643 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2647 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2649 uint64_t &KnownZero,
2651 const SelectionDAG &DAG,
2652 unsigned Depth ) const {
2657 // LowerAsmOperandForConstraint
2659 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2660 char ConstraintLetter,
2661 std::vector<SDOperand> &Ops,
2662 SelectionDAG &DAG) {
2663 // Default, for the time being, to the base class handler
2664 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2667 /// isLegalAddressImmediate - Return true if the integer value can be used
2668 /// as the offset of the target addressing mode.
2669 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2670 // SPU's addresses are 256K:
2671 return (V > -(1 << 18) && V < (1 << 18) - 1);
2674 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {