1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by a team from the Computer Systems Research
6 // Department at The Aerospace Corporation.
8 // See README.txt for details.
10 //===----------------------------------------------------------------------===//
12 // This file implements the SPUTargetLowering class.
14 //===----------------------------------------------------------------------===//
16 #include "SPURegisterNames.h"
17 #include "SPUISelLowering.h"
18 #include "SPUTargetMachine.h"
19 #include "llvm/ADT/VectorExtras.h"
20 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/SSARegMap.h"
27 #include "llvm/Constants.h"
28 #include "llvm/Function.h"
29 #include "llvm/Intrinsics.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Target/TargetOptions.h"
38 // Used in getTargetNodeName() below
40 std::map<unsigned, const char *> node_names;
42 //! MVT::ValueType mapping to useful data for Cell SPU
43 struct valtype_map_s {
44 const MVT::ValueType valtype;
45 const int prefslot_byte;
48 const valtype_map_s valtype_map[] = {
59 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
61 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
62 const valtype_map_s *retval = 0;
64 for (size_t i = 0; i < n_valtype_map; ++i) {
65 if (valtype_map[i].valtype == VT) {
66 retval = valtype_map + i;
73 cerr << "getValueTypeMapEntry returns NULL for "
74 << MVT::getValueTypeString(VT)
83 //! Predicate that returns true if operand is a memory target
85 \arg Op Operand to test
86 \return true if the operand is a memory target (i.e., global
87 address, external symbol, constant pool) or an existing D-Form
90 bool isMemoryOperand(const SDOperand &Op)
92 const unsigned Opc = Op.getOpcode();
93 return (Opc == ISD::GlobalAddress
94 || Opc == ISD::GlobalTLSAddress
95 || Opc == ISD::FrameIndex
96 || Opc == ISD::JumpTable
97 || Opc == ISD::ConstantPool
98 || Opc == ISD::ExternalSymbol
99 || Opc == ISD::TargetGlobalAddress
100 || Opc == ISD::TargetGlobalTLSAddress
101 || Opc == ISD::TargetFrameIndex
102 || Opc == ISD::TargetJumpTable
103 || Opc == ISD::TargetConstantPool
104 || Opc == ISD::TargetExternalSymbol
105 || Opc == SPUISD::DFormAddr);
109 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
110 : TargetLowering(TM),
113 // Fold away setcc operations if possible.
116 // Use _setjmp/_longjmp instead of setjmp/longjmp.
117 setUseUnderscoreSetJmp(true);
118 setUseUnderscoreLongJmp(true);
120 // Set up the SPU's register classes:
121 // NOTE: i8 register class is not registered because we cannot determine when
122 // we need to zero or sign extend for custom-lowered loads and stores.
123 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
124 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
125 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
126 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
127 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
128 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
130 // SPU has no sign or zero extended loads for i1, i8, i16:
131 setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
132 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
133 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
134 setStoreXAction(MVT::i1, Custom);
136 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
137 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
138 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
139 setStoreXAction(MVT::i8, Custom);
141 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
142 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
143 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
145 // SPU constant load actions are custom lowered:
146 setOperationAction(ISD::Constant, MVT::i64, Custom);
147 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
148 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
150 // SPU's loads and stores have to be custom lowered:
151 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
153 setOperationAction(ISD::LOAD, sctype, Custom);
154 setOperationAction(ISD::STORE, sctype, Custom);
157 // SPU supports BRCOND, although DAGCombine will convert BRCONDs
158 // into BR_CCs. BR_CC instructions are custom selected in
160 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
162 // Expand the jumptable branches
163 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
164 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
165 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
167 // SPU has no intrinsics for these particular operations:
168 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
169 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
170 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
172 // PowerPC has no SREM/UREM instructions
173 setOperationAction(ISD::SREM, MVT::i32, Expand);
174 setOperationAction(ISD::UREM, MVT::i32, Expand);
175 setOperationAction(ISD::SREM, MVT::i64, Expand);
176 setOperationAction(ISD::UREM, MVT::i64, Expand);
178 // We don't support sin/cos/sqrt/fmod
179 setOperationAction(ISD::FSIN , MVT::f64, Expand);
180 setOperationAction(ISD::FCOS , MVT::f64, Expand);
181 setOperationAction(ISD::FREM , MVT::f64, Expand);
182 setOperationAction(ISD::FSIN , MVT::f32, Expand);
183 setOperationAction(ISD::FCOS , MVT::f32, Expand);
184 setOperationAction(ISD::FREM , MVT::f32, Expand);
186 // If we're enabling GP optimizations, use hardware square root
187 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
188 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
190 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
191 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
193 // SPU can do rotate right and left, so legalize it... but customize for i8
194 // because instructions don't exist.
195 setOperationAction(ISD::ROTR, MVT::i32, Legal);
196 setOperationAction(ISD::ROTR, MVT::i16, Legal);
197 setOperationAction(ISD::ROTR, MVT::i8, Custom);
198 setOperationAction(ISD::ROTL, MVT::i32, Legal);
199 setOperationAction(ISD::ROTL, MVT::i16, Legal);
200 setOperationAction(ISD::ROTL, MVT::i8, Custom);
201 // SPU has no native version of shift left/right for i8
202 setOperationAction(ISD::SHL, MVT::i8, Custom);
203 setOperationAction(ISD::SRL, MVT::i8, Custom);
204 setOperationAction(ISD::SRA, MVT::i8, Custom);
206 // Custom lower i32 multiplications
207 setOperationAction(ISD::MUL, MVT::i32, Custom);
209 // Need to custom handle (some) common i8 math ops
210 setOperationAction(ISD::SUB, MVT::i8, Custom);
211 setOperationAction(ISD::MUL, MVT::i8, Custom);
213 // SPU does not have BSWAP. It does have i32 support CTLZ.
214 // CTPOP has to be custom lowered.
215 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
216 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
218 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
219 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
220 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
221 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
223 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
224 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
226 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
228 // SPU does not have select or setcc
229 setOperationAction(ISD::SELECT, MVT::i1, Expand);
230 setOperationAction(ISD::SELECT, MVT::i8, Expand);
231 setOperationAction(ISD::SELECT, MVT::i16, Expand);
232 setOperationAction(ISD::SELECT, MVT::i32, Expand);
233 setOperationAction(ISD::SELECT, MVT::i64, Expand);
234 setOperationAction(ISD::SELECT, MVT::f32, Expand);
235 setOperationAction(ISD::SELECT, MVT::f64, Expand);
237 setOperationAction(ISD::SETCC, MVT::i1, Expand);
238 setOperationAction(ISD::SETCC, MVT::i8, Expand);
239 setOperationAction(ISD::SETCC, MVT::i16, Expand);
240 setOperationAction(ISD::SETCC, MVT::i32, Expand);
241 setOperationAction(ISD::SETCC, MVT::i64, Expand);
242 setOperationAction(ISD::SETCC, MVT::f32, Expand);
243 setOperationAction(ISD::SETCC, MVT::f64, Expand);
245 // SPU has a legal FP -> signed INT instruction
246 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
247 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
248 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
249 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
251 // FDIV on SPU requires custom lowering
252 setOperationAction(ISD::FDIV, MVT::f32, Custom);
253 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
255 // SPU has [U|S]INT_TO_FP
256 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
257 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
258 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
259 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
260 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
261 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
262 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
263 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
265 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
266 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
267 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
268 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
270 // We cannot sextinreg(i1). Expand to shifts.
271 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
273 // Support label based line numbers.
274 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
275 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
277 // We want to legalize GlobalAddress and ConstantPool nodes into the
278 // appropriate instructions to materialize the address.
279 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
280 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
281 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
282 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
283 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
284 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
285 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
286 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
288 // RET must be custom lowered, to meet ABI requirements
289 setOperationAction(ISD::RET, MVT::Other, Custom);
291 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
292 setOperationAction(ISD::VASTART , MVT::Other, Custom);
294 // Use the default implementation.
295 setOperationAction(ISD::VAARG , MVT::Other, Expand);
296 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
297 setOperationAction(ISD::VAEND , MVT::Other, Expand);
298 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
299 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
300 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
301 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
303 // Cell SPU has instructions for converting between i64 and fp.
304 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
305 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
307 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
308 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
310 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
311 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
313 // First set operation action for all vector types to expand. Then we
314 // will selectively turn on ones that can be effectively codegen'd.
315 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
316 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
317 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
318 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
319 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
320 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
322 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
323 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
324 // add/sub are legal for all supported vector VT's.
325 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
326 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
327 // mul has to be custom lowered.
328 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
330 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
331 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
332 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
333 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
334 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
335 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
337 // These operations need to be expanded:
338 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
339 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
340 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
341 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
342 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
344 // Custom lower build_vector, constant pool spills, insert and
345 // extract vector elements:
346 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
347 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
348 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
349 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
350 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
351 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
354 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
355 setOperationAction(ISD::AND, MVT::v16i8, Custom);
356 setOperationAction(ISD::OR, MVT::v16i8, Custom);
357 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
358 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
360 setSetCCResultType(MVT::i32);
361 setShiftAmountType(MVT::i32);
362 setSetCCResultContents(ZeroOrOneSetCCResult);
364 setStackPointerRegisterToSaveRestore(SPU::R1);
366 // We have target-specific dag combine patterns for the following nodes:
367 // e.g., setTargetDAGCombine(ISD::SUB);
369 computeRegisterProperties();
373 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
375 if (node_names.empty()) {
376 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
377 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
378 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
379 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
380 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
381 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
382 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
383 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
384 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
385 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
386 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
387 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
388 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
389 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
390 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
391 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
392 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
393 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
394 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
395 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
396 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
397 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
398 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
399 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
400 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
401 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
402 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
403 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
404 "SPUISD::ROTBYTES_RIGHT_Z";
405 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
406 "SPUISD::ROTBYTES_RIGHT_S";
407 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
408 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
409 "SPUISD::ROTBYTES_LEFT_CHAINED";
410 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
411 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
412 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
413 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
414 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
415 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
418 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
420 return ((i != node_names.end()) ? i->second : 0);
423 //===----------------------------------------------------------------------===//
424 // Calling convention code:
425 //===----------------------------------------------------------------------===//
427 #include "SPUGenCallingConv.inc"
429 //===----------------------------------------------------------------------===//
430 // LowerOperation implementation
431 //===----------------------------------------------------------------------===//
433 /// Custom lower loads for CellSPU
435 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
436 within a 16-byte block, we have to rotate to extract the requested element.
439 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
440 LoadSDNode *LN = cast<LoadSDNode>(Op);
441 SDOperand basep = LN->getBasePtr();
442 SDOperand the_chain = LN->getChain();
443 MVT::ValueType VT = LN->getLoadedVT();
444 MVT::ValueType OpVT = Op.Val->getValueType(0);
445 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
446 ISD::LoadExtType ExtType = LN->getExtensionType();
447 unsigned alignment = LN->getAlignment();
448 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
451 // For an extending load of an i1 variable, just call it i8 (or whatever we
452 // were passed) and make it zero-extended:
455 ExtType = ISD::ZEXTLOAD;
458 switch (LN->getAddressingMode()) {
459 case ISD::UNINDEXED: {
461 SDOperand rot_op, rotamt;
466 // The vector type we really want to be when we load the 16-byte chunk
467 MVT::ValueType vecVT, opVecVT;
470 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
474 opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
476 if (basep.getOpcode() == ISD::ADD) {
477 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
480 && "LowerLOAD: ISD::ADD operand 1 is not constant");
482 c_offset = (int) CN->getValue();
483 c_rotamt = (int) (c_offset & 0xf);
485 // Adjust the rotation amount to ensure that the final result ends up in
486 // the preferred slot:
487 c_rotamt -= vtm->prefslot_byte;
488 ptrp = basep.getOperand(0);
491 c_rotamt = -vtm->prefslot_byte;
495 if (alignment == 16) {
496 // 16-byte aligned load into preferred slot, no rotation
498 if (isMemoryOperand(ptrp))
502 // Return modified D-Form address for pointer:
503 ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
504 ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
506 return DAG.getLoad(VT, LN->getChain(), ptrp,
507 LN->getSrcValue(), LN->getSrcValueOffset(),
508 LN->isVolatile(), 16);
510 return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
511 LN->getSrcValueOffset(), OpVT,
512 LN->isVolatile(), 16);
518 // Realign the base pointer, with a D-Form address
519 if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp))
520 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
521 ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
526 rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
527 LN->getSrcValue(), LN->getSrcValueOffset(),
528 LN->isVolatile(), 16);
529 the_chain = rot_op.getValue(1);
530 rotamt = DAG.getConstant(c_rotamt, MVT::i16);
532 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
537 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
538 the_chain = result.getValue(1);
540 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
545 scalarvts = DAG.getVTList(VT, MVT::Other);
547 scalarvts = DAG.getVTList(OpVT, MVT::Other);
550 result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
554 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
555 the_chain = result.getValue(1);
557 // Handle the sign and zero-extending loads for i1 and i8:
560 if (ExtType == ISD::SEXTLOAD) {
561 NewOpC = (OpVT == MVT::i1
562 ? SPUISD::EXTRACT_I1_SEXT
563 : SPUISD::EXTRACT_I8_SEXT);
564 } else if (ExtType == ISD::ZEXTLOAD) {
565 NewOpC = (OpVT == MVT::i1
566 ? SPUISD::EXTRACT_I1_ZEXT
567 : SPUISD::EXTRACT_I8_ZEXT);
570 result = DAG.getNode(NewOpC, OpVT, result);
573 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
574 SDOperand retops[2] = { result, the_chain };
576 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
581 // Misaligned 16-byte load:
582 if (basep.getOpcode() == ISD::LOAD) {
583 LN = cast<LoadSDNode>(basep);
584 if (LN->getAlignment() == 16) {
585 // We can verify that we're really loading from a 16-byte aligned
586 // chunk. Encapsulate basep as a D-Form address and return a new
588 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
589 DAG.getConstant(0, PtrVT));
591 return DAG.getLoad(VT, LN->getChain(), basep,
592 LN->getSrcValue(), LN->getSrcValueOffset(),
593 LN->isVolatile(), 16);
595 return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
596 LN->getSrcValue(), LN->getSrcValueOffset(),
597 OpVT, LN->isVolatile(), 16);
601 // Catch all other cases where we can't guarantee that we have a
602 // 16-byte aligned entity, which means resorting to an X-form
605 SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
606 SDOperand loOp = DAG.getNode(SPUISD::Lo, VT, basep, ZeroOffs);
607 SDOperand hiOp = DAG.getNode(SPUISD::Hi, VT, basep, ZeroOffs);
609 ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
611 SDOperand alignLoad =
612 DAG.getLoad(opVecVT, LN->getChain(), ptrp,
613 LN->getSrcValue(), LN->getSrcValueOffset(),
614 LN->isVolatile(), 16);
616 SDOperand insertEltOp =
617 DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
619 result = DAG.getNode(SPUISD::SHUFB, opVecVT,
622 DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
624 result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
626 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
627 SDOperand retops[2] = { result, the_chain };
629 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
638 case ISD::LAST_INDEXED_MODE:
639 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
641 cerr << (unsigned) LN->getAddressingMode() << "\n";
649 /// Custom lower stores for CellSPU
651 All CellSPU stores are aligned to 16-byte boundaries, so for elements
652 within a 16-byte block, we have to generate a shuffle to insert the
653 requested element into its place, then store the resulting block.
656 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
657 StoreSDNode *SN = cast<StoreSDNode>(Op);
658 SDOperand Value = SN->getValue();
659 MVT::ValueType VT = Value.getValueType();
660 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
661 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
662 SDOperand the_chain = SN->getChain();
663 unsigned alignment = SN->getAlignment();
664 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
666 switch (SN->getAddressingMode()) {
667 case ISD::UNINDEXED: {
668 SDOperand basep = SN->getBasePtr();
672 if (basep.getOpcode() == ISD::ADD) {
673 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
675 && "LowerSTORE: ISD::ADD operand 1 is not constant");
676 offset = unsigned(CN->getValue());
677 ptrOp = basep.getOperand(0);
678 DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
686 // The vector type we really want to load from the 16-byte chunk, except
687 // in the case of MVT::i1, which has to be v16i8.
688 unsigned vecVT, stVecVT;
691 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
693 stVecVT = MVT::v16i8;
694 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
696 // Realign the pointer as a D-Form address (ptrOp is the pointer,
697 // to force a register load with the address; basep is the actual
698 // dform addr offs($reg).
699 ptrOp = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
700 DAG.getConstant(0, PtrVT));
701 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
702 ptrOp, DAG.getConstant((offset & ~0xf), PtrVT));
704 // Create the 16-byte aligned vector load
705 SDOperand alignLoad =
706 DAG.getLoad(vecVT, the_chain, basep,
707 SN->getSrcValue(), SN->getSrcValueOffset(),
708 SN->isVolatile(), 16);
709 the_chain = alignLoad.getValue(1);
711 LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
712 SDOperand theValue = SN->getValue();
716 && (theValue.getOpcode() == ISD::AssertZext
717 || theValue.getOpcode() == ISD::AssertSext)) {
718 // Drill down and get the value for zero- and sign-extended
720 theValue = theValue.getOperand(0);
723 SDOperand insertEltOp =
724 DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
725 DAG.getNode(SPUISD::DFormAddr, PtrVT,
727 DAG.getConstant((offset & 0xf), PtrVT)));
729 result = DAG.getNode(SPUISD::SHUFB, vecVT,
730 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
732 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
734 result = DAG.getStore(the_chain, result, basep,
735 LN->getSrcValue(), LN->getSrcValueOffset(),
736 LN->isVolatile(), LN->getAlignment());
745 case ISD::LAST_INDEXED_MODE:
746 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
748 cerr << (unsigned) SN->getAddressingMode() << "\n";
756 /// Generate the address of a constant pool entry.
758 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
759 MVT::ValueType PtrVT = Op.getValueType();
760 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
761 Constant *C = CP->getConstVal();
762 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
763 const TargetMachine &TM = DAG.getTarget();
764 SDOperand Zero = DAG.getConstant(0, PtrVT);
766 if (TM.getRelocationModel() == Reloc::Static) {
767 if (!ST->usingLargeMem()) {
768 // Just return the SDOperand with the constant pool address in it.
771 // Generate hi/lo address pair
772 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
773 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
775 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
780 "LowerConstantPool: Relocation model other than static not supported.");
785 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
786 MVT::ValueType PtrVT = Op.getValueType();
787 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
788 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
789 SDOperand Zero = DAG.getConstant(0, PtrVT);
790 const TargetMachine &TM = DAG.getTarget();
792 if (TM.getRelocationModel() == Reloc::Static) {
793 if (!ST->usingLargeMem()) {
794 // Just return the SDOperand with the jump table address in it.
797 // Generate hi/lo address pair
798 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
799 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
801 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
806 "LowerJumpTable: Relocation model other than static not supported.");
811 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
812 MVT::ValueType PtrVT = Op.getValueType();
813 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
814 GlobalValue *GV = GSDN->getGlobal();
815 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
816 SDOperand Zero = DAG.getConstant(0, PtrVT);
817 const TargetMachine &TM = DAG.getTarget();
819 if (TM.getRelocationModel() == Reloc::Static) {
820 if (!ST->usingLargeMem()) {
821 // Generate a local store address
824 // Generate hi/lo address pair
825 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
826 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
828 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
831 cerr << "LowerGlobalAddress: Relocation model other than static not "
840 //! Custom lower i64 integer constants
842 This code inserts all of the necessary juggling that needs to occur to load
843 a 64-bit constant into a register.
846 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
847 unsigned VT = Op.getValueType();
848 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
850 if (VT == MVT::i64) {
851 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
852 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
853 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
856 cerr << "LowerConstant: unhandled constant type "
857 << MVT::getValueTypeString(VT)
866 //! Custom lower single precision floating point constants
868 "float" immediates can be lowered as if they were unsigned 32-bit integers.
869 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
873 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
874 unsigned VT = Op.getValueType();
875 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
878 "LowerConstantFP: Node is not ConstantFPSDNode");
880 const APFloat &apf = FP->getValueAPF();
882 if (VT == MVT::f32) {
883 return DAG.getNode(SPUISD::SFPConstant, VT,
884 DAG.getTargetConstantFP(apf.convertToFloat(), VT));
885 } else if (VT == MVT::f64) {
886 uint64_t dbits = DoubleToBits(apf.convertToDouble());
887 return DAG.getNode(ISD::BIT_CONVERT, VT,
888 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
895 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
897 MachineFunction &MF = DAG.getMachineFunction();
898 MachineFrameInfo *MFI = MF.getFrameInfo();
899 SSARegMap *RegMap = MF.getSSARegMap();
900 SmallVector<SDOperand, 8> ArgValues;
901 SDOperand Root = Op.getOperand(0);
902 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
904 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
905 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
907 unsigned ArgOffset = SPUFrameInfo::minStackSize();
908 unsigned ArgRegIdx = 0;
909 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
911 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
913 // Add DAG nodes to load the arguments or copy them out of registers.
914 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
916 bool needsLoad = false;
917 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
918 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
922 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
923 << MVT::getValueTypeString(ObjectVT)
928 if (!isVarArg && ArgRegIdx < NumArgRegs) {
929 unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
930 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
931 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
938 if (!isVarArg && ArgRegIdx < NumArgRegs) {
939 unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
940 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
941 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
948 if (!isVarArg && ArgRegIdx < NumArgRegs) {
949 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
950 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
951 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
958 if (!isVarArg && ArgRegIdx < NumArgRegs) {
959 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
960 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
961 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
968 if (!isVarArg && ArgRegIdx < NumArgRegs) {
969 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
970 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
971 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
978 if (!isVarArg && ArgRegIdx < NumArgRegs) {
979 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
980 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
981 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
992 if (!isVarArg && ArgRegIdx < NumArgRegs) {
993 unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
994 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
995 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1003 // We need to load the argument to a virtual register if we determined above
1004 // that we ran out of physical registers of the appropriate type
1006 // If the argument is actually used, emit a load from the right stack
1008 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
1009 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1010 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1011 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1013 // Don't emit a dead load.
1014 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1017 ArgOffset += StackSlotSize;
1020 ArgValues.push_back(ArgVal);
1023 // If the function takes variable number of arguments, make a frame index for
1024 // the start of the first vararg value... for expansion of llvm.va_start.
1026 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1028 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1029 // If this function is vararg, store any remaining integer argument regs to
1030 // their spots on the stack so that they may be loaded by deferencing the
1031 // result of va_next.
1032 SmallVector<SDOperand, 8> MemOps;
1033 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1034 unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
1035 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1036 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1037 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1038 MemOps.push_back(Store);
1039 // Increment the address by four for the next argument to store
1040 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1041 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1043 if (!MemOps.empty())
1044 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1047 ArgValues.push_back(Root);
1049 // Return the new list of results.
1050 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1051 Op.Val->value_end());
1052 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1055 /// isLSAAddress - Return the immediate to use if the specified
1056 /// value is representable as a LSA address.
1057 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1058 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1061 int Addr = C->getValue();
1062 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1063 (Addr << 14 >> 14) != Addr)
1064 return 0; // Top 14 bits have to be sext of immediate.
1066 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1071 LowerCALL(SDOperand Op, SelectionDAG &DAG) {
1072 SDOperand Chain = Op.getOperand(0);
1074 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1075 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1077 SDOperand Callee = Op.getOperand(4);
1078 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1079 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1080 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1081 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1083 // Handy pointer type
1084 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1086 // Accumulate how many bytes are to be pushed on the stack, including the
1087 // linkage area, and parameter passing area. According to the SPU ABI,
1088 // we minimally need space for [LR] and [SP]
1089 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1091 // Set up a copy of the stack pointer for use loading and storing any
1092 // arguments that may not fit in the registers available for argument
1094 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1096 // Figure out which arguments are going to go in registers, and which in
1098 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1099 unsigned ArgRegIdx = 0;
1101 // Keep track of registers passing arguments
1102 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1103 // And the arguments passed on the stack
1104 SmallVector<SDOperand, 8> MemOpChains;
1106 for (unsigned i = 0; i != NumOps; ++i) {
1107 SDOperand Arg = Op.getOperand(5+2*i);
1109 // PtrOff will be used to store the current argument to the stack if a
1110 // register cannot be found for it.
1111 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1112 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1114 switch (Arg.getValueType()) {
1115 default: assert(0 && "Unexpected ValueType for argument!");
1119 if (ArgRegIdx != NumArgRegs) {
1120 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1122 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1123 ArgOffset += StackSlotSize;
1128 if (ArgRegIdx != NumArgRegs) {
1129 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1131 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1132 ArgOffset += StackSlotSize;
1139 if (ArgRegIdx != NumArgRegs) {
1140 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1142 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1143 ArgOffset += StackSlotSize;
1149 // Update number of stack bytes actually used, insert a call sequence start
1150 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1151 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1153 if (!MemOpChains.empty()) {
1154 // Adjust the stack pointer for the stack arguments.
1155 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1156 &MemOpChains[0], MemOpChains.size());
1159 // Build a sequence of copy-to-reg nodes chained together with token chain
1160 // and flag operands which copy the outgoing args into the appropriate regs.
1162 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1163 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1165 InFlag = Chain.getValue(1);
1168 std::vector<MVT::ValueType> NodeTys;
1169 NodeTys.push_back(MVT::Other); // Returns a chain
1170 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1172 SmallVector<SDOperand, 8> Ops;
1173 unsigned CallOpc = SPUISD::CALL;
1175 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1176 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1177 // node so that legalize doesn't hack it.
1178 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1179 GlobalValue *GV = G->getGlobal();
1180 unsigned CalleeVT = Callee.getValueType();
1182 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1183 // style calls, otherwise, external symbols are BRASL calls.
1185 // This may be an unsafe assumption for JIT and really large compilation
1187 if (GV->isDeclaration()) {
1188 Callee = DAG.getGlobalAddress(GV, CalleeVT);
1190 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
1191 DAG.getTargetGlobalAddress(GV, CalleeVT),
1192 DAG.getConstant(0, PtrVT));
1194 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1195 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1196 else if (SDNode *Dest = isLSAAddress(Callee, DAG))
1197 // If this is an absolute destination address that appears to be a legal
1198 // local store address, use the munged value.
1199 Callee = SDOperand(Dest, 0);
1201 Ops.push_back(Chain);
1202 Ops.push_back(Callee);
1204 // Add argument registers to the end of the list so that they are known live
1206 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1207 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1208 RegsToPass[i].second.getValueType()));
1211 Ops.push_back(InFlag);
1212 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1213 InFlag = Chain.getValue(1);
1215 SDOperand ResultVals[3];
1216 unsigned NumResults = 0;
1219 // If the call has results, copy the values out of the ret val registers.
1220 switch (Op.Val->getValueType(0)) {
1221 default: assert(0 && "Unexpected ret value!");
1222 case MVT::Other: break;
1224 if (Op.Val->getValueType(1) == MVT::i32) {
1225 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1226 ResultVals[0] = Chain.getValue(0);
1227 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1228 Chain.getValue(2)).getValue(1);
1229 ResultVals[1] = Chain.getValue(0);
1231 NodeTys.push_back(MVT::i32);
1233 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1234 ResultVals[0] = Chain.getValue(0);
1237 NodeTys.push_back(MVT::i32);
1240 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1241 ResultVals[0] = Chain.getValue(0);
1243 NodeTys.push_back(MVT::i64);
1247 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1248 InFlag).getValue(1);
1249 ResultVals[0] = Chain.getValue(0);
1251 NodeTys.push_back(Op.Val->getValueType(0));
1258 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1259 InFlag).getValue(1);
1260 ResultVals[0] = Chain.getValue(0);
1262 NodeTys.push_back(Op.Val->getValueType(0));
1266 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1267 DAG.getConstant(NumStackBytes, PtrVT));
1268 NodeTys.push_back(MVT::Other);
1270 // If the function returns void, just return the chain.
1271 if (NumResults == 0)
1274 // Otherwise, merge everything together with a MERGE_VALUES node.
1275 ResultVals[NumResults++] = Chain;
1276 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1277 ResultVals, NumResults);
1278 return Res.getValue(Op.ResNo);
1282 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1283 SmallVector<CCValAssign, 16> RVLocs;
1284 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1285 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1286 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1287 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1289 // If this is the first return lowered for this function, add the regs to the
1290 // liveout set for the function.
1291 if (DAG.getMachineFunction().liveout_empty()) {
1292 for (unsigned i = 0; i != RVLocs.size(); ++i)
1293 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
1296 SDOperand Chain = Op.getOperand(0);
1299 // Copy the result values into the output registers.
1300 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1301 CCValAssign &VA = RVLocs[i];
1302 assert(VA.isRegLoc() && "Can only return in registers!");
1303 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1304 Flag = Chain.getValue(1);
1308 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1310 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1314 //===----------------------------------------------------------------------===//
1315 // Vector related lowering:
1316 //===----------------------------------------------------------------------===//
1318 static ConstantSDNode *
1319 getVecImm(SDNode *N) {
1320 SDOperand OpVal(0, 0);
1322 // Check to see if this buildvec has a single non-undef value in its elements.
1323 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1324 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1326 OpVal = N->getOperand(i);
1327 else if (OpVal != N->getOperand(i))
1331 if (OpVal.Val != 0) {
1332 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1337 return 0; // All UNDEF: use implicit def.; not Constant node
1340 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1341 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1343 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1344 MVT::ValueType ValueType) {
1345 if (ConstantSDNode *CN = getVecImm(N)) {
1346 uint64_t Value = CN->getValue();
1347 if (Value <= 0x3ffff)
1348 return DAG.getConstant(Value, ValueType);
1354 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1355 /// and the value fits into a signed 16-bit constant, and if so, return the
1357 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1358 MVT::ValueType ValueType) {
1359 if (ConstantSDNode *CN = getVecImm(N)) {
1360 if (ValueType == MVT::i32) {
1361 int Value = (int) CN->getValue();
1362 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1364 if (Value == SExtValue)
1365 return DAG.getConstant(Value, ValueType);
1366 } else if (ValueType == MVT::i16) {
1367 short Value = (short) CN->getValue();
1368 int SExtValue = ((int) Value << 16) >> 16;
1370 if (Value == (short) SExtValue)
1371 return DAG.getConstant(Value, ValueType);
1372 } else if (ValueType == MVT::i64) {
1373 int64_t Value = CN->getValue();
1374 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1376 if (Value == SExtValue)
1377 return DAG.getConstant(Value, ValueType);
1384 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1385 /// and the value fits into a signed 10-bit constant, and if so, return the
1387 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1388 MVT::ValueType ValueType) {
1389 if (ConstantSDNode *CN = getVecImm(N)) {
1390 int Value = (int) CN->getValue();
1391 if ((ValueType == MVT::i32 && isS10Constant(Value))
1392 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1393 return DAG.getConstant(Value, ValueType);
1399 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1400 /// and the value fits into a signed 8-bit constant, and if so, return the
1403 /// @note: The incoming vector is v16i8 because that's the only way we can load
1404 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1406 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1407 MVT::ValueType ValueType) {
1408 if (ConstantSDNode *CN = getVecImm(N)) {
1409 int Value = (int) CN->getValue();
1410 if (ValueType == MVT::i16
1411 && Value <= 0xffff /* truncated from uint64_t */
1412 && ((short) Value >> 8) == ((short) Value & 0xff))
1413 return DAG.getConstant(Value & 0xff, ValueType);
1414 else if (ValueType == MVT::i8
1415 && (Value & 0xff) == Value)
1416 return DAG.getConstant(Value, ValueType);
1422 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1423 /// and the value fits into a signed 16-bit constant, and if so, return the
1425 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1426 MVT::ValueType ValueType) {
1427 if (ConstantSDNode *CN = getVecImm(N)) {
1428 uint64_t Value = CN->getValue();
1429 if ((ValueType == MVT::i32
1430 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1431 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1432 return DAG.getConstant(Value >> 16, ValueType);
1438 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1439 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1440 if (ConstantSDNode *CN = getVecImm(N)) {
1441 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1447 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1448 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1449 if (ConstantSDNode *CN = getVecImm(N)) {
1450 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1456 // If this is a vector of constants or undefs, get the bits. A bit in
1457 // UndefBits is set if the corresponding element of the vector is an
1458 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1459 // zero. Return true if this is not an array of constants, false if it is.
1461 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1462 uint64_t UndefBits[2]) {
1463 // Start with zero'd results.
1464 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1466 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1467 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1468 SDOperand OpVal = BV->getOperand(i);
1470 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1471 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1473 uint64_t EltBits = 0;
1474 if (OpVal.getOpcode() == ISD::UNDEF) {
1475 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1476 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1478 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1479 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1480 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1481 const APFloat &apf = CN->getValueAPF();
1482 EltBits = (CN->getValueType(0) == MVT::f32
1483 ? FloatToBits(apf.convertToFloat())
1484 : DoubleToBits(apf.convertToDouble()));
1486 // Nonconstant element.
1490 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1493 //printf("%llx %llx %llx %llx\n",
1494 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1498 /// If this is a splat (repetition) of a value across the whole vector, return
1499 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1500 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1501 /// SplatSize = 1 byte.
1502 static bool isConstantSplat(const uint64_t Bits128[2],
1503 const uint64_t Undef128[2],
1505 uint64_t &SplatBits, uint64_t &SplatUndef,
1507 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1508 // the same as the lower 64-bits, ignoring undefs.
1509 uint64_t Bits64 = Bits128[0] | Bits128[1];
1510 uint64_t Undef64 = Undef128[0] & Undef128[1];
1511 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1512 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1513 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1514 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1516 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1517 if (MinSplatBits < 64) {
1519 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1521 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1522 if (MinSplatBits < 32) {
1524 // If the top 16-bits are different than the lower 16-bits, ignoring
1525 // undefs, we have an i32 splat.
1526 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1527 if (MinSplatBits < 16) {
1528 // If the top 8-bits are different than the lower 8-bits, ignoring
1529 // undefs, we have an i16 splat.
1530 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1531 // Otherwise, we have an 8-bit splat.
1532 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1533 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1539 SplatUndef = Undef16;
1546 SplatUndef = Undef32;
1552 SplatBits = Bits128[0];
1553 SplatUndef = Undef128[0];
1559 return false; // Can't be a splat if two pieces don't match.
1562 // If this is a case we can't handle, return null and let the default
1563 // expansion code take care of it. If we CAN select this case, and if it
1564 // selects to a single instruction, return Op. Otherwise, if we can codegen
1565 // this case more efficiently than a constant pool load, lower it to the
1566 // sequence of ops that should be used.
1567 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1568 MVT::ValueType VT = Op.getValueType();
1569 // If this is a vector of constants or undefs, get the bits. A bit in
1570 // UndefBits is set if the corresponding element of the vector is an
1571 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1573 uint64_t VectorBits[2];
1574 uint64_t UndefBits[2];
1575 uint64_t SplatBits, SplatUndef;
1577 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1578 || !isConstantSplat(VectorBits, UndefBits,
1579 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1580 SplatBits, SplatUndef, SplatSize))
1581 return SDOperand(); // Not a constant vector, not a splat.
1586 uint32_t Value32 = SplatBits;
1587 assert(SplatSize == 4
1588 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1589 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1590 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1591 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1592 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1596 uint64_t f64val = SplatBits;
1597 assert(SplatSize == 8
1598 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1599 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1600 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1601 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1602 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1606 // 8-bit constants have to be expanded to 16-bits
1607 unsigned short Value16 = SplatBits | (SplatBits << 8);
1609 for (int i = 0; i < 8; ++i)
1610 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1611 return DAG.getNode(ISD::BIT_CONVERT, VT,
1612 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1615 unsigned short Value16;
1617 Value16 = (unsigned short) (SplatBits & 0xffff);
1619 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1620 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1622 for (int i = 0; i < 8; ++i) Ops[i] = T;
1623 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1626 unsigned int Value = SplatBits;
1627 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1628 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1631 uint64_t val = SplatBits;
1632 uint32_t upper = uint32_t(val >> 32);
1633 uint32_t lower = uint32_t(val);
1638 SmallVector<SDOperand, 16> ShufBytes;
1640 bool upper_special, lower_special;
1642 // NOTE: This code creates common-case shuffle masks that can be easily
1643 // detected as common expressions. It is not attempting to create highly
1644 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1646 // Detect if the upper or lower half is a special shuffle mask pattern:
1647 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1648 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1650 // Create lower vector if not a special pattern
1651 if (!lower_special) {
1652 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1653 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1654 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1655 LO32C, LO32C, LO32C, LO32C));
1658 // Create upper vector if not a special pattern
1659 if (!upper_special) {
1660 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1661 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1662 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1663 HI32C, HI32C, HI32C, HI32C));
1666 // If either upper or lower are special, then the two input operands are
1667 // the same (basically, one of them is a "don't care")
1672 if (lower_special && upper_special) {
1673 // Unhappy situation... both upper and lower are special, so punt with
1674 // a target constant:
1675 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1676 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1680 for (int i = 0; i < 4; ++i) {
1681 for (int j = 0; j < 4; ++j) {
1683 bool process_upper, process_lower;
1686 process_upper = (upper_special && (i & 1) == 0);
1687 process_lower = (lower_special && (i & 1) == 1);
1689 if (process_upper || process_lower) {
1690 if ((process_upper && upper == 0)
1691 || (process_lower && lower == 0))
1693 else if ((process_upper && upper == 0xffffffff)
1694 || (process_lower && lower == 0xffffffff))
1696 else if ((process_upper && upper == 0x80000000)
1697 || (process_lower && lower == 0x80000000))
1698 val = (j == 0 ? 0xe0 : 0x80);
1700 val = i * 4 + j + ((i & 1) * 16);
1702 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1706 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1707 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1708 &ShufBytes[0], ShufBytes.size()));
1710 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1711 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1712 return DAG.getNode(ISD::BIT_CONVERT, VT,
1713 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1714 Zero, Zero, Zero, Zero));
1722 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1723 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1724 /// permutation vector, V3, is monotonically increasing with one "exception"
1725 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1726 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1727 /// In either case, the net result is going to eventually invoke SHUFB to
1728 /// permute/shuffle the bytes from V1 and V2.
1730 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1731 /// control word for byte/halfword/word insertion. This takes care of a single
1732 /// element move from V2 into V1.
1734 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1735 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1736 SDOperand V1 = Op.getOperand(0);
1737 SDOperand V2 = Op.getOperand(1);
1738 SDOperand PermMask = Op.getOperand(2);
1740 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1742 // If we have a single element being moved from V1 to V2, this can be handled
1743 // using the C*[DX] compute mask instructions, but the vector elements have
1744 // to be monotonically increasing with one exception element.
1745 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1746 unsigned EltsFromV2 = 0;
1748 unsigned V2EltIdx0 = 0;
1749 unsigned CurrElt = 0;
1750 bool monotonic = true;
1751 if (EltVT == MVT::i8)
1753 else if (EltVT == MVT::i16)
1755 else if (EltVT == MVT::i32)
1758 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1760 for (unsigned i = 0, e = PermMask.getNumOperands();
1761 EltsFromV2 <= 1 && monotonic && i != e;
1764 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1767 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1769 if (SrcElt >= V2EltIdx0) {
1771 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1772 } else if (CurrElt != SrcElt) {
1779 if (EltsFromV2 == 1 && monotonic) {
1780 // Compute mask and shuffle
1781 MachineFunction &MF = DAG.getMachineFunction();
1782 SSARegMap *RegMap = MF.getSSARegMap();
1783 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
1784 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1785 // Initialize temporary register to 0
1786 SDOperand InitTempReg =
1787 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1788 // Copy register's contents as index in INSERT_MASK:
1789 SDOperand ShufMaskOp =
1790 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1791 DAG.getTargetConstant(V2Elt, MVT::i32),
1792 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1793 // Use shuffle mask in SHUFB synthetic instruction:
1794 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1796 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1797 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1799 SmallVector<SDOperand, 16> ResultMask;
1800 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1802 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1805 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1807 for (unsigned j = 0; j != BytesPerElement; ++j) {
1808 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1813 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1814 &ResultMask[0], ResultMask.size());
1815 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1819 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1820 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1822 if (Op0.Val->getOpcode() == ISD::Constant) {
1823 // For a constant, build the appropriate constant vector, which will
1824 // eventually simplify to a vector register load.
1826 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1827 SmallVector<SDOperand, 16> ConstVecValues;
1831 // Create a constant vector:
1832 switch (Op.getValueType()) {
1833 default: assert(0 && "Unexpected constant value type in "
1834 "LowerSCALAR_TO_VECTOR");
1835 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1836 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1837 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1838 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1839 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1840 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1843 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1844 for (size_t j = 0; j < n_copies; ++j)
1845 ConstVecValues.push_back(CValue);
1847 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1848 &ConstVecValues[0], ConstVecValues.size());
1850 // Otherwise, copy the value from one register to another:
1851 switch (Op0.getValueType()) {
1852 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1859 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1866 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1867 switch (Op.getValueType()) {
1869 SDOperand rA = Op.getOperand(0);
1870 SDOperand rB = Op.getOperand(1);
1871 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1872 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1873 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1874 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1876 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1880 // Multiply two v8i16 vectors (pipeline friendly version):
1881 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1882 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1883 // c) Use SELB to select upper and lower halves from the intermediate results
1885 // NOTE: We really want to move the FSMBI to earlier to actually get the
1886 // dual-issue. This code does manage to do this, even if it's a little on
1889 MachineFunction &MF = DAG.getMachineFunction();
1890 SSARegMap *RegMap = MF.getSSARegMap();
1891 SDOperand Chain = Op.getOperand(0);
1892 SDOperand rA = Op.getOperand(0);
1893 SDOperand rB = Op.getOperand(1);
1894 unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1895 unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1898 DAG.getCopyToReg(Chain, FSMBIreg,
1899 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1900 DAG.getConstant(0xcccc, MVT::i32)));
1903 DAG.getCopyToReg(FSMBOp, HiProdReg,
1904 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1906 SDOperand HHProd_v4i32 =
1907 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1908 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1910 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1911 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1912 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1913 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1915 DAG.getConstant(16, MVT::i16))),
1916 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1919 // This M00sE is N@stI! (apologies to Monty Python)
1921 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1922 // is to break it all apart, sign extend, and reassemble the various
1923 // intermediate products.
1925 MachineFunction &MF = DAG.getMachineFunction();
1926 SSARegMap *RegMap = MF.getSSARegMap();
1927 SDOperand Chain = Op.getOperand(0);
1928 SDOperand rA = Op.getOperand(0);
1929 SDOperand rB = Op.getOperand(1);
1930 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1931 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1933 unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1934 unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1935 unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1938 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1939 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1940 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1942 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1944 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1947 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1948 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1950 SDOperand FSMBdef_2222 =
1951 DAG.getCopyToReg(Chain, FSMBreg_2222,
1952 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1953 DAG.getConstant(0x2222, MVT::i32)));
1955 SDOperand FSMBuse_2222 =
1956 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1958 SDOperand LoProd_1 =
1959 DAG.getCopyToReg(Chain, LoProd_reg,
1960 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1963 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1966 DAG.getNode(ISD::AND, MVT::v4i32,
1967 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1968 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1969 LoProdMask, LoProdMask,
1970 LoProdMask, LoProdMask));
1973 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1974 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1977 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1978 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1981 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1982 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1983 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1985 SDOperand HHProd_1 =
1986 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1987 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1988 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1989 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1990 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1993 DAG.getCopyToReg(Chain, HiProd_reg,
1994 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1996 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2000 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2001 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2003 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2004 DAG.getNode(ISD::OR, MVT::v4i32,
2009 cerr << "CellSPU: Unknown vector multiplication, got "
2010 << MVT::getValueTypeString(Op.getValueType())
2019 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2020 MachineFunction &MF = DAG.getMachineFunction();
2021 SSARegMap *RegMap = MF.getSSARegMap();
2023 SDOperand A = Op.getOperand(0);
2024 SDOperand B = Op.getOperand(1);
2025 unsigned VT = Op.getValueType();
2027 unsigned VRegBR, VRegC;
2029 if (VT == MVT::f32) {
2030 VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2031 VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2033 VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2034 VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2036 // TODO: make sure we're feeding FPInterp the right arguments
2037 // Right now: fi B, frest(B)
2040 // (Floating Interpolate (FP Reciprocal Estimate B))
2042 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2043 DAG.getNode(SPUISD::FPInterp, VT, B,
2044 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2046 // Computes A * BRcpl and stores in a temporary register
2048 DAG.getCopyToReg(BRcpl, VRegC,
2049 DAG.getNode(ISD::FMUL, VT, A,
2050 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2051 // What's the Chain variable do? It's magic!
2052 // TODO: set Chain = Op(0).getEntryNode()
2054 return DAG.getNode(ISD::FADD, VT,
2055 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2056 DAG.getNode(ISD::FMUL, VT,
2057 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2058 DAG.getNode(ISD::FSUB, VT, A,
2059 DAG.getNode(ISD::FMUL, VT, B,
2060 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2063 // Expands double-precision FDIV
2064 // Expects two doubles as inputs X and Y, does a floating point
2065 // reciprocal estimate, and three iterations of Newton-Raphson
2066 // to increase accuracy.
2067 //static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
2068 // MachineFunction &MF = DAG.getMachineFunction();
2069 // SSARegMap *RegMap = MF.getSSARegMap();
2071 // SDOperand X = Op.getOperand(0);
2072 // SDOperand Y = Op.getOperand(1);
2075 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2076 unsigned VT = Op.getValueType();
2077 SDOperand N = Op.getOperand(0);
2078 SDOperand Elt = Op.getOperand(1);
2079 SDOperand ShufMask[16];
2080 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2082 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2084 int EltNo = (int) C->getValue();
2087 if (VT == MVT::i8 && EltNo >= 16)
2088 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2089 else if (VT == MVT::i16 && EltNo >= 8)
2090 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2091 else if (VT == MVT::i32 && EltNo >= 4)
2092 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2093 else if (VT == MVT::i64 && EltNo >= 2)
2094 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2096 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2097 // i32 and i64: Element 0 is the preferred slot
2098 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2101 // Need to generate shuffle mask and extract:
2102 int prefslot_begin, prefslot_end;
2103 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2107 prefslot_begin = prefslot_end = 3;
2111 prefslot_begin = 2; prefslot_end = 3;
2115 prefslot_begin = 0; prefslot_end = 3;
2119 prefslot_begin = 0; prefslot_end = 7;
2124 for (int i = 0; i < 16; ++i) {
2125 // zero fill uppper part of preferred slot, don't care about the
2127 unsigned int mask_val;
2129 if (i <= prefslot_end) {
2131 ((i < prefslot_begin)
2133 : elt_byte + (i - prefslot_begin));
2135 ShufMask[i] = DAG.getConstant(mask_val, MVT::i16);
2137 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2140 SDOperand ShufMaskVec =
2141 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2143 sizeof(ShufMask) / sizeof(ShufMask[0]));
2145 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2146 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2147 N, N, ShufMaskVec));
2151 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2152 SDOperand VecOp = Op.getOperand(0);
2153 SDOperand ValOp = Op.getOperand(1);
2154 SDOperand IdxOp = Op.getOperand(2);
2155 MVT::ValueType VT = Op.getValueType();
2157 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2158 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2160 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2161 // Use $2 because it's always 16-byte aligned and it's available:
2162 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2165 DAG.getNode(SPUISD::SHUFB, VT,
2166 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2168 DAG.getNode(SPUISD::INSERT_MASK, VT,
2169 DAG.getNode(ISD::ADD, PtrVT,
2171 DAG.getConstant(CN->getValue(),
2177 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2178 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2180 assert(Op.getValueType() == MVT::i8);
2183 assert(0 && "Unhandled i8 math operator");
2187 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2189 SDOperand N1 = Op.getOperand(1);
2190 N0 = (N0.getOpcode() != ISD::Constant
2191 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2192 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2193 N1 = (N1.getOpcode() != ISD::Constant
2194 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2195 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2196 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2197 DAG.getNode(Opc, MVT::i16, N0, N1));
2201 SDOperand N1 = Op.getOperand(1);
2203 N0 = (N0.getOpcode() != ISD::Constant
2204 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2205 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2206 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2207 N1 = (N1.getOpcode() != ISD::Constant
2208 ? DAG.getNode(N1Opc, MVT::i16, N1)
2209 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2210 SDOperand ExpandArg =
2211 DAG.getNode(ISD::OR, MVT::i16, N0,
2212 DAG.getNode(ISD::SHL, MVT::i16,
2213 N0, DAG.getConstant(8, MVT::i16)));
2214 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2215 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2219 SDOperand N1 = Op.getOperand(1);
2221 N0 = (N0.getOpcode() != ISD::Constant
2222 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2223 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2224 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2225 N1 = (N1.getOpcode() != ISD::Constant
2226 ? DAG.getNode(N1Opc, MVT::i16, N1)
2227 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2228 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2229 DAG.getNode(Opc, MVT::i16, N0, N1));
2232 SDOperand N1 = Op.getOperand(1);
2234 N0 = (N0.getOpcode() != ISD::Constant
2235 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2236 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2237 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2238 N1 = (N1.getOpcode() != ISD::Constant
2239 ? DAG.getNode(N1Opc, MVT::i16, N1)
2240 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2241 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2242 DAG.getNode(Opc, MVT::i16, N0, N1));
2245 SDOperand N1 = Op.getOperand(1);
2247 N0 = (N0.getOpcode() != ISD::Constant
2248 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2249 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2250 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2251 N1 = (N1.getOpcode() != ISD::Constant
2252 ? DAG.getNode(N1Opc, MVT::i16, N1)
2253 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2254 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2255 DAG.getNode(Opc, MVT::i16, N0, N1));
2263 //! Lower byte immediate operations for v16i8 vectors:
2265 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2268 MVT::ValueType VT = Op.getValueType();
2270 ConstVec = Op.getOperand(0);
2271 Arg = Op.getOperand(1);
2272 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2273 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2274 ConstVec = ConstVec.getOperand(0);
2276 ConstVec = Op.getOperand(1);
2277 Arg = Op.getOperand(0);
2278 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2279 ConstVec = ConstVec.getOperand(0);
2284 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2285 uint64_t VectorBits[2];
2286 uint64_t UndefBits[2];
2287 uint64_t SplatBits, SplatUndef;
2290 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2291 && isConstantSplat(VectorBits, UndefBits,
2292 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2293 SplatBits, SplatUndef, SplatSize)) {
2294 SDOperand tcVec[16];
2295 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2296 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2298 // Turn the BUILD_VECTOR into a set of target constants:
2299 for (size_t i = 0; i < tcVecSize; ++i)
2302 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2303 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2310 //! Lower i32 multiplication
2311 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2315 cerr << "CellSPU: Unknown LowerMUL value type, got "
2316 << MVT::getValueTypeString(Op.getValueType())
2322 SDOperand rA = Op.getOperand(0);
2323 SDOperand rB = Op.getOperand(1);
2325 return DAG.getNode(ISD::ADD, MVT::i32,
2326 DAG.getNode(ISD::ADD, MVT::i32,
2327 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2328 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2329 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2336 //! Custom lowering for CTPOP (count population)
2338 Custom lowering code that counts the number ones in the input
2339 operand. SPU has such an instruction, but it counts the number of
2340 ones per byte, which then have to be accumulated.
2342 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2343 unsigned VT = Op.getValueType();
2344 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2348 SDOperand N = Op.getOperand(0);
2349 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2351 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2352 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2354 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2358 MachineFunction &MF = DAG.getMachineFunction();
2359 SSARegMap *RegMap = MF.getSSARegMap();
2361 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
2363 SDOperand N = Op.getOperand(0);
2364 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2365 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2366 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2368 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2369 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2371 // CNTB_result becomes the chain to which all of the virtual registers
2372 // CNTB_reg, SUM1_reg become associated:
2373 SDOperand CNTB_result =
2374 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2376 SDOperand CNTB_rescopy =
2377 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2379 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2381 return DAG.getNode(ISD::AND, MVT::i16,
2382 DAG.getNode(ISD::ADD, MVT::i16,
2383 DAG.getNode(ISD::SRL, MVT::i16,
2390 MachineFunction &MF = DAG.getMachineFunction();
2391 SSARegMap *RegMap = MF.getSSARegMap();
2393 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2394 unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2396 SDOperand N = Op.getOperand(0);
2397 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2398 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2399 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2400 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2402 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2403 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2405 // CNTB_result becomes the chain to which all of the virtual registers
2406 // CNTB_reg, SUM1_reg become associated:
2407 SDOperand CNTB_result =
2408 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2410 SDOperand CNTB_rescopy =
2411 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2414 DAG.getNode(ISD::SRL, MVT::i32,
2415 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2418 DAG.getNode(ISD::ADD, MVT::i32,
2419 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2421 SDOperand Sum1_rescopy =
2422 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2425 DAG.getNode(ISD::SRL, MVT::i32,
2426 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2429 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2430 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2432 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2442 /// LowerOperation - Provide custom lowering hooks for some operations.
2445 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2447 switch (Op.getOpcode()) {
2449 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2450 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2451 cerr << "*Op.Val:\n";
2458 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2460 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2461 case ISD::ConstantPool:
2462 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2463 case ISD::GlobalAddress:
2464 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2465 case ISD::JumpTable:
2466 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2468 return LowerConstant(Op, DAG);
2469 case ISD::ConstantFP:
2470 return LowerConstantFP(Op, DAG);
2471 case ISD::FORMAL_ARGUMENTS:
2472 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2474 return LowerCALL(Op, DAG);
2476 return LowerRET(Op, DAG, getTargetMachine());
2485 return LowerI8Math(Op, DAG, Op.getOpcode());
2487 // Vector-related lowering.
2488 case ISD::BUILD_VECTOR:
2489 return LowerBUILD_VECTOR(Op, DAG);
2490 case ISD::SCALAR_TO_VECTOR:
2491 return LowerSCALAR_TO_VECTOR(Op, DAG);
2492 case ISD::VECTOR_SHUFFLE:
2493 return LowerVECTOR_SHUFFLE(Op, DAG);
2494 case ISD::EXTRACT_VECTOR_ELT:
2495 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2496 case ISD::INSERT_VECTOR_ELT:
2497 return LowerINSERT_VECTOR_ELT(Op, DAG);
2499 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2503 return LowerByteImmed(Op, DAG);
2505 // Vector and i8 multiply:
2507 if (MVT::isVector(Op.getValueType()))
2508 return LowerVectorMUL(Op, DAG);
2509 else if (Op.getValueType() == MVT::i8)
2510 return LowerI8Math(Op, DAG, Op.getOpcode());
2512 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2515 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2516 return LowerFDIVf32(Op, DAG);
2517 // else if (Op.getValueType() == MVT::f64)
2518 // return LowerFDIVf64(Op, DAG);
2520 assert(0 && "Calling FDIV on unsupported MVT");
2523 return LowerCTPOP(Op, DAG);
2529 //===----------------------------------------------------------------------===//
2530 // Other Lowering Code
2531 //===----------------------------------------------------------------------===//
2534 SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2535 MachineBasicBlock *BB)
2540 //===----------------------------------------------------------------------===//
2541 // Target Optimization Hooks
2542 //===----------------------------------------------------------------------===//
2545 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2548 TargetMachine &TM = getTargetMachine();
2549 SelectionDAG &DAG = DCI.DAG;
2551 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2553 switch (N->getOpcode()) {
2556 // Look for obvious optimizations for shift left:
2557 // a) Replace 0 << V with 0
2558 // b) Replace V << 0 with V
2560 // N.B: llvm will generate an undef node if the shift amount is greater than
2561 // 15 (e.g.: V << 16), which will naturally trigger an assert.
2564 case SPU::SHLQBIIvec:
2566 case SPU::ROTHIr16_i32:
2568 case SPU::ROTIr32_i16:
2569 case SPU::ROTQBYIvec:
2570 case SPU::ROTQBYBIvec:
2571 case SPU::ROTQBIIvec:
2572 case SPU::ROTHMIr16:
2574 case SPU::ROTQMBYIvec: {
2575 if (N0.getOpcode() == ISD::Constant) {
2576 if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2577 if (C->getValue() == 0) // 0 << V -> 0.
2581 SDOperand N1 = N->getOperand(1);
2582 if (N1.getOpcode() == ISD::Constant) {
2583 if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2584 if (C->getValue() == 0) // V << 0 -> V
2595 //===----------------------------------------------------------------------===//
2596 // Inline Assembly Support
2597 //===----------------------------------------------------------------------===//
2599 /// getConstraintType - Given a constraint letter, return the type of
2600 /// constraint it is for this target.
2601 SPUTargetLowering::ConstraintType
2602 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2603 if (ConstraintLetter.size() == 1) {
2604 switch (ConstraintLetter[0]) {
2611 return C_RegisterClass;
2614 return TargetLowering::getConstraintType(ConstraintLetter);
2617 std::pair<unsigned, const TargetRegisterClass*>
2618 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2619 MVT::ValueType VT) const
2621 if (Constraint.size() == 1) {
2622 // GCC RS6000 Constraint Letters
2623 switch (Constraint[0]) {
2627 return std::make_pair(0U, SPU::R64CRegisterClass);
2628 return std::make_pair(0U, SPU::R32CRegisterClass);
2631 return std::make_pair(0U, SPU::R32FPRegisterClass);
2632 else if (VT == MVT::f64)
2633 return std::make_pair(0U, SPU::R64FPRegisterClass);
2636 return std::make_pair(0U, SPU::GPRCRegisterClass);
2640 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2644 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2646 uint64_t &KnownZero,
2648 const SelectionDAG &DAG,
2649 unsigned Depth ) const {
2654 // LowerAsmOperandForConstraint
2656 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2657 char ConstraintLetter,
2658 std::vector<SDOperand> &Ops,
2659 SelectionDAG &DAG) {
2660 // Default, for the time being, to the base class handler
2661 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2664 /// isLegalAddressImmediate - Return true if the integer value can be used
2665 /// as the offset of the target addressing mode.
2666 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2667 // SPU's addresses are 256K:
2668 return (V > -(1 << 18) && V < (1 << 18) - 1);
2671 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {