1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by a team from the Computer Systems Research
6 // Department at The Aerospace Corporation and is distributed under the
7 // University of Illinois Open Source License. See LICENSE.TXT for details.
9 //===----------------------------------------------------------------------===//
11 // This file implements the SPUTargetLowering class.
13 //===----------------------------------------------------------------------===//
15 #include "SPURegisterNames.h"
16 #include "SPUISelLowering.h"
17 #include "SPUTargetMachine.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/SSARegMap.h"
26 #include "llvm/Constants.h"
27 #include "llvm/Function.h"
28 #include "llvm/Intrinsics.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Target/TargetOptions.h"
37 // Used in getTargetNodeName() below
39 std::map<unsigned, const char *> node_names;
41 //! MVT::ValueType mapping to useful data for Cell SPU
42 struct valtype_map_s {
43 const MVT::ValueType valtype;
44 const int prefslot_byte;
47 const valtype_map_s valtype_map[] = {
58 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
60 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
61 const valtype_map_s *retval = 0;
63 for (size_t i = 0; i < n_valtype_map; ++i) {
64 if (valtype_map[i].valtype == VT) {
65 retval = valtype_map + i;
72 cerr << "getValueTypeMapEntry returns NULL for "
73 << MVT::getValueTypeString(VT)
82 //! Predicate that returns true if operand is a memory target
84 \arg Op Operand to test
85 \return true if the operand is a memory target (i.e., global
86 address, external symbol, constant pool) or an existing D-Form
89 bool isMemoryOperand(const SDOperand &Op)
91 const unsigned Opc = Op.getOpcode();
92 return (Opc == ISD::GlobalAddress
93 || Opc == ISD::GlobalTLSAddress
94 || Opc == ISD::FrameIndex
95 || Opc == ISD::JumpTable
96 || Opc == ISD::ConstantPool
97 || Opc == ISD::ExternalSymbol
98 || Opc == ISD::TargetGlobalAddress
99 || Opc == ISD::TargetGlobalTLSAddress
100 || Opc == ISD::TargetFrameIndex
101 || Opc == ISD::TargetJumpTable
102 || Opc == ISD::TargetConstantPool
103 || Opc == ISD::TargetExternalSymbol
104 || Opc == SPUISD::DFormAddr);
108 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
109 : TargetLowering(TM),
112 // Fold away setcc operations if possible.
115 // Use _setjmp/_longjmp instead of setjmp/longjmp.
116 setUseUnderscoreSetJmp(true);
117 setUseUnderscoreLongJmp(true);
119 // Set up the SPU's register classes:
120 // NOTE: i8 register class is not registered because we cannot determine when
121 // we need to zero or sign extend for custom-lowered loads and stores.
122 // NOTE: Ignore the previous note. For now. :-)
123 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
124 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
125 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
126 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
127 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
128 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
129 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
131 // SPU has no sign or zero extended loads for i1, i8, i16:
132 setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
133 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
134 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
135 setStoreXAction(MVT::i1, Custom);
137 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
138 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
139 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
140 setStoreXAction(MVT::i8, Custom);
142 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
143 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
144 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
146 // SPU constant load actions are custom lowered:
147 setOperationAction(ISD::Constant, MVT::i64, Custom);
148 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
149 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
151 // SPU's loads and stores have to be custom lowered:
152 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
154 setOperationAction(ISD::LOAD, sctype, Custom);
155 setOperationAction(ISD::STORE, sctype, Custom);
158 // SPU supports BRCOND, although DAGCombine will convert BRCONDs
159 // into BR_CCs. BR_CC instructions are custom selected in
161 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
163 // Expand the jumptable branches
164 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
165 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
166 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
168 // SPU has no intrinsics for these particular operations:
169 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
170 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
171 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
173 // PowerPC has no SREM/UREM instructions
174 setOperationAction(ISD::SREM, MVT::i32, Expand);
175 setOperationAction(ISD::UREM, MVT::i32, Expand);
176 setOperationAction(ISD::SREM, MVT::i64, Expand);
177 setOperationAction(ISD::UREM, MVT::i64, Expand);
179 // We don't support sin/cos/sqrt/fmod
180 setOperationAction(ISD::FSIN , MVT::f64, Expand);
181 setOperationAction(ISD::FCOS , MVT::f64, Expand);
182 setOperationAction(ISD::FREM , MVT::f64, Expand);
183 setOperationAction(ISD::FSIN , MVT::f32, Expand);
184 setOperationAction(ISD::FCOS , MVT::f32, Expand);
185 setOperationAction(ISD::FREM , MVT::f32, Expand);
187 // If we're enabling GP optimizations, use hardware square root
188 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
189 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
191 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
192 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
194 // SPU can do rotate right and left, so legalize it... but customize for i8
195 // because instructions don't exist.
196 setOperationAction(ISD::ROTR, MVT::i32, Legal);
197 setOperationAction(ISD::ROTR, MVT::i16, Legal);
198 setOperationAction(ISD::ROTR, MVT::i8, Custom);
199 setOperationAction(ISD::ROTL, MVT::i32, Legal);
200 setOperationAction(ISD::ROTL, MVT::i16, Legal);
201 setOperationAction(ISD::ROTL, MVT::i8, Custom);
202 // SPU has no native version of shift left/right for i8
203 setOperationAction(ISD::SHL, MVT::i8, Custom);
204 setOperationAction(ISD::SRL, MVT::i8, Custom);
205 setOperationAction(ISD::SRA, MVT::i8, Custom);
207 // Custom lower i32 multiplications
208 setOperationAction(ISD::MUL, MVT::i32, Custom);
210 // Need to custom handle (some) common i8 math ops
211 setOperationAction(ISD::SUB, MVT::i8, Custom);
212 setOperationAction(ISD::MUL, MVT::i8, Custom);
214 // SPU does not have BSWAP. It does have i32 support CTLZ.
215 // CTPOP has to be custom lowered.
216 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
217 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
219 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
220 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
221 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
222 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
224 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
225 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
227 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
229 // SPU does not have select or setcc
230 setOperationAction(ISD::SELECT, MVT::i1, Expand);
231 setOperationAction(ISD::SELECT, MVT::i8, Expand);
232 setOperationAction(ISD::SELECT, MVT::i16, Expand);
233 setOperationAction(ISD::SELECT, MVT::i32, Expand);
234 setOperationAction(ISD::SELECT, MVT::i64, Expand);
235 setOperationAction(ISD::SELECT, MVT::f32, Expand);
236 setOperationAction(ISD::SELECT, MVT::f64, Expand);
238 setOperationAction(ISD::SETCC, MVT::i1, Expand);
239 setOperationAction(ISD::SETCC, MVT::i8, Expand);
240 setOperationAction(ISD::SETCC, MVT::i16, Expand);
241 setOperationAction(ISD::SETCC, MVT::i32, Expand);
242 setOperationAction(ISD::SETCC, MVT::i64, Expand);
243 setOperationAction(ISD::SETCC, MVT::f32, Expand);
244 setOperationAction(ISD::SETCC, MVT::f64, Expand);
246 // SPU has a legal FP -> signed INT instruction
247 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
248 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
249 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
250 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
252 // FDIV on SPU requires custom lowering
253 setOperationAction(ISD::FDIV, MVT::f32, Custom);
254 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
256 // SPU has [U|S]INT_TO_FP
257 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
258 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
259 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
260 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
261 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
262 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
263 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
264 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
266 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
267 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
268 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
269 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
271 // We cannot sextinreg(i1). Expand to shifts.
272 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
274 // Support label based line numbers.
275 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
276 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
278 // We want to legalize GlobalAddress and ConstantPool nodes into the
279 // appropriate instructions to materialize the address.
280 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
281 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
282 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
283 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
284 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
285 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
286 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
287 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
289 // RET must be custom lowered, to meet ABI requirements
290 setOperationAction(ISD::RET, MVT::Other, Custom);
292 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
293 setOperationAction(ISD::VASTART , MVT::Other, Custom);
295 // Use the default implementation.
296 setOperationAction(ISD::VAARG , MVT::Other, Expand);
297 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
298 setOperationAction(ISD::VAEND , MVT::Other, Expand);
299 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
300 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
301 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
302 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
304 // Cell SPU has instructions for converting between i64 and fp.
305 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
306 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
308 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
309 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
311 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
312 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
314 // First set operation action for all vector types to expand. Then we
315 // will selectively turn on ones that can be effectively codegen'd.
316 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
317 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
318 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
319 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
320 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
321 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
323 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
324 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
325 // add/sub are legal for all supported vector VT's.
326 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
327 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
328 // mul has to be custom lowered.
329 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
331 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
332 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
333 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
334 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
335 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
336 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
338 // These operations need to be expanded:
339 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
340 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
341 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
342 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
343 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
345 // Custom lower build_vector, constant pool spills, insert and
346 // extract vector elements:
347 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
348 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
349 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
350 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
351 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
352 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
355 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
356 setOperationAction(ISD::AND, MVT::v16i8, Custom);
357 setOperationAction(ISD::OR, MVT::v16i8, Custom);
358 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
359 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
361 setSetCCResultType(MVT::i32);
362 setShiftAmountType(MVT::i32);
363 setSetCCResultContents(ZeroOrOneSetCCResult);
365 setStackPointerRegisterToSaveRestore(SPU::R1);
367 // We have target-specific dag combine patterns for the following nodes:
368 // e.g., setTargetDAGCombine(ISD::SUB);
370 computeRegisterProperties();
374 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
376 if (node_names.empty()) {
377 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
378 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
379 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
380 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
381 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
382 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
383 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
384 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
385 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
386 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
387 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
388 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
389 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
390 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
391 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
392 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
393 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
394 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
395 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
396 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
397 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
398 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
399 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
400 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
401 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
402 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
403 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
404 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
405 "SPUISD::ROTBYTES_RIGHT_Z";
406 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
407 "SPUISD::ROTBYTES_RIGHT_S";
408 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
409 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
410 "SPUISD::ROTBYTES_LEFT_CHAINED";
411 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
412 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
413 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
414 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
415 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
416 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
419 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
421 return ((i != node_names.end()) ? i->second : 0);
424 //===----------------------------------------------------------------------===//
425 // Calling convention code:
426 //===----------------------------------------------------------------------===//
428 #include "SPUGenCallingConv.inc"
430 //===----------------------------------------------------------------------===//
431 // LowerOperation implementation
432 //===----------------------------------------------------------------------===//
434 /// Custom lower loads for CellSPU
436 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
437 within a 16-byte block, we have to rotate to extract the requested element.
440 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
441 LoadSDNode *LN = cast<LoadSDNode>(Op);
442 SDOperand basep = LN->getBasePtr();
443 SDOperand the_chain = LN->getChain();
444 MVT::ValueType BasepOpc = basep.Val->getOpcode();
445 MVT::ValueType VT = LN->getLoadedVT();
446 MVT::ValueType OpVT = Op.Val->getValueType(0);
447 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
448 ISD::LoadExtType ExtType = LN->getExtensionType();
449 unsigned alignment = LN->getAlignment();
450 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
453 if (BasepOpc == ISD::FrameIndex) {
454 // Loading from a frame index is always properly aligned. Always.
458 // For an extending load of an i1 variable, just call it i8 (or whatever we
459 // were passed) and make it zero-extended:
462 ExtType = ISD::ZEXTLOAD;
465 switch (LN->getAddressingMode()) {
466 case ISD::UNINDEXED: {
468 SDOperand rot_op, rotamt;
473 // The vector type we really want to be when we load the 16-byte chunk
474 MVT::ValueType vecVT, opVecVT;
478 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
479 opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
481 if (basep.getOpcode() == ISD::ADD) {
482 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
485 && "LowerLOAD: ISD::ADD operand 1 is not constant");
487 c_offset = (int) CN->getValue();
488 c_rotamt = (int) (c_offset & 0xf);
490 // Adjust the rotation amount to ensure that the final result ends up in
491 // the preferred slot:
492 c_rotamt -= vtm->prefslot_byte;
493 ptrp = basep.getOperand(0);
496 c_rotamt = -vtm->prefslot_byte;
500 if (alignment == 16) {
501 // 16-byte aligned load into preferred slot, no rotation
503 if (isMemoryOperand(ptrp))
507 // Return modified D-Form address for pointer:
508 ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
509 ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
511 return DAG.getLoad(VT, LN->getChain(), ptrp,
512 LN->getSrcValue(), LN->getSrcValueOffset(),
513 LN->isVolatile(), 16);
515 return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
516 LN->getSrcValueOffset(), OpVT,
517 LN->isVolatile(), 16);
523 // Realign the base pointer, with a D-Form address
524 if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp))
525 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
526 ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
531 rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
532 LN->getSrcValue(), LN->getSrcValueOffset(),
533 LN->isVolatile(), 16);
534 the_chain = rot_op.getValue(1);
535 rotamt = DAG.getConstant(c_rotamt, MVT::i16);
537 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
542 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
543 the_chain = result.getValue(1);
545 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
550 scalarvts = DAG.getVTList(VT, MVT::Other);
552 scalarvts = DAG.getVTList(OpVT, MVT::Other);
555 result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
559 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
560 the_chain = result.getValue(1);
562 // Handle the sign and zero-extending loads for i1 and i8:
565 if (ExtType == ISD::SEXTLOAD) {
566 NewOpC = (OpVT == MVT::i1
567 ? SPUISD::EXTRACT_I1_SEXT
568 : SPUISD::EXTRACT_I8_SEXT);
570 assert(ExtType == ISD::ZEXTLOAD);
571 NewOpC = (OpVT == MVT::i1
572 ? SPUISD::EXTRACT_I1_ZEXT
573 : SPUISD::EXTRACT_I8_ZEXT);
576 result = DAG.getNode(NewOpC, OpVT, result);
579 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
580 SDOperand retops[2] = { result, the_chain };
582 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
587 // Misaligned 16-byte load:
588 if (basep.getOpcode() == ISD::LOAD) {
589 LN = cast<LoadSDNode>(basep);
590 if (LN->getAlignment() == 16) {
591 // We can verify that we're really loading from a 16-byte aligned
592 // chunk. Encapsulate basep as a D-Form address and return a new
594 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
595 DAG.getConstant(0, PtrVT));
597 return DAG.getLoad(VT, LN->getChain(), basep,
598 LN->getSrcValue(), LN->getSrcValueOffset(),
599 LN->isVolatile(), 16);
601 return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
602 LN->getSrcValue(), LN->getSrcValueOffset(),
603 OpVT, LN->isVolatile(), 16);
607 // Catch all other cases where we can't guarantee that we have a
608 // 16-byte aligned entity, which means resorting to an X-form
611 SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
612 SDOperand loOp = DAG.getNode(SPUISD::Lo, PtrVT, basep, ZeroOffs);
613 SDOperand hiOp = DAG.getNode(SPUISD::Hi, PtrVT, basep, ZeroOffs);
615 ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
617 SDOperand alignLoad =
618 DAG.getLoad(opVecVT, LN->getChain(), ptrp,
619 LN->getSrcValue(), LN->getSrcValueOffset(),
620 LN->isVolatile(), 16);
622 SDOperand insertEltOp =
623 DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
625 result = DAG.getNode(SPUISD::SHUFB, opVecVT,
628 DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
630 result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
632 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
633 SDOperand retops[2] = { result, the_chain };
635 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
644 case ISD::LAST_INDEXED_MODE:
645 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
647 cerr << (unsigned) LN->getAddressingMode() << "\n";
655 /// Custom lower stores for CellSPU
657 All CellSPU stores are aligned to 16-byte boundaries, so for elements
658 within a 16-byte block, we have to generate a shuffle to insert the
659 requested element into its place, then store the resulting block.
662 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
663 StoreSDNode *SN = cast<StoreSDNode>(Op);
664 SDOperand Value = SN->getValue();
665 MVT::ValueType VT = Value.getValueType();
666 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
667 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
668 SDOperand the_chain = SN->getChain();
669 //unsigned alignment = SN->getAlignment();
670 //const valtype_map_s *vtm = getValueTypeMapEntry(VT);
672 switch (SN->getAddressingMode()) {
673 case ISD::UNINDEXED: {
674 SDOperand basep = SN->getBasePtr();
678 if (basep.getOpcode() == ISD::FrameIndex) {
679 // FrameIndex nodes are always properly aligned. Really.
683 if (basep.getOpcode() == ISD::ADD) {
684 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
686 && "LowerSTORE: ISD::ADD operand 1 is not constant");
687 offset = unsigned(CN->getValue());
688 ptrOp = basep.getOperand(0);
689 DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
697 // The vector type we really want to load from the 16-byte chunk, except
698 // in the case of MVT::i1, which has to be v16i8.
699 unsigned vecVT, stVecVT;
702 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
704 stVecVT = MVT::v16i8;
705 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
707 // Realign the pointer as a D-Form address (ptrOp is the pointer, basep is
708 // the actual dform addr offs($reg).
709 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
710 DAG.getConstant((offset & ~0xf), PtrVT));
712 // Create the 16-byte aligned vector load
713 SDOperand alignLoad =
714 DAG.getLoad(vecVT, the_chain, basep,
715 SN->getSrcValue(), SN->getSrcValueOffset(),
716 SN->isVolatile(), 16);
717 the_chain = alignLoad.getValue(1);
719 LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
720 SDOperand theValue = SN->getValue();
724 && (theValue.getOpcode() == ISD::AssertZext
725 || theValue.getOpcode() == ISD::AssertSext)) {
726 // Drill down and get the value for zero- and sign-extended
728 theValue = theValue.getOperand(0);
731 SDOperand insertEltOp =
732 DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
733 DAG.getNode(SPUISD::DFormAddr, PtrVT,
735 DAG.getConstant((offset & 0xf), PtrVT)));
737 result = DAG.getNode(SPUISD::SHUFB, vecVT,
738 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
740 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
742 result = DAG.getStore(the_chain, result, basep,
743 LN->getSrcValue(), LN->getSrcValueOffset(),
744 LN->isVolatile(), LN->getAlignment());
753 case ISD::LAST_INDEXED_MODE:
754 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
756 cerr << (unsigned) SN->getAddressingMode() << "\n";
764 /// Generate the address of a constant pool entry.
766 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
767 MVT::ValueType PtrVT = Op.getValueType();
768 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
769 Constant *C = CP->getConstVal();
770 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
771 const TargetMachine &TM = DAG.getTarget();
772 SDOperand Zero = DAG.getConstant(0, PtrVT);
774 if (TM.getRelocationModel() == Reloc::Static) {
775 if (!ST->usingLargeMem()) {
776 // Just return the SDOperand with the constant pool address in it.
779 // Generate hi/lo address pair
780 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
781 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
783 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
788 "LowerConstantPool: Relocation model other than static not supported.");
793 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
794 MVT::ValueType PtrVT = Op.getValueType();
795 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
796 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
797 SDOperand Zero = DAG.getConstant(0, PtrVT);
798 const TargetMachine &TM = DAG.getTarget();
800 if (TM.getRelocationModel() == Reloc::Static) {
801 if (!ST->usingLargeMem()) {
802 // Just return the SDOperand with the jump table address in it.
805 // Generate hi/lo address pair
806 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
807 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
809 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
814 "LowerJumpTable: Relocation model other than static not supported.");
819 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
820 MVT::ValueType PtrVT = Op.getValueType();
821 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
822 GlobalValue *GV = GSDN->getGlobal();
823 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
824 SDOperand Zero = DAG.getConstant(0, PtrVT);
825 const TargetMachine &TM = DAG.getTarget();
827 if (TM.getRelocationModel() == Reloc::Static) {
828 if (!ST->usingLargeMem()) {
829 // Generate a local store address
832 // Generate hi/lo address pair
833 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
834 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
836 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
839 cerr << "LowerGlobalAddress: Relocation model other than static not "
848 //! Custom lower i64 integer constants
850 This code inserts all of the necessary juggling that needs to occur to load
851 a 64-bit constant into a register.
854 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
855 unsigned VT = Op.getValueType();
856 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
858 if (VT == MVT::i64) {
859 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
860 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
861 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
864 cerr << "LowerConstant: unhandled constant type "
865 << MVT::getValueTypeString(VT)
874 //! Custom lower single precision floating point constants
876 "float" immediates can be lowered as if they were unsigned 32-bit integers.
877 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
881 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
882 unsigned VT = Op.getValueType();
883 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
886 "LowerConstantFP: Node is not ConstantFPSDNode");
888 if (VT == MVT::f32) {
889 float targetConst = FP->getValueAPF().convertToFloat();
890 return DAG.getNode(SPUISD::SFPConstant, VT,
891 DAG.getTargetConstantFP(targetConst, VT));
892 } else if (VT == MVT::f64) {
893 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
894 return DAG.getNode(ISD::BIT_CONVERT, VT,
895 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
902 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
904 MachineFunction &MF = DAG.getMachineFunction();
905 MachineFrameInfo *MFI = MF.getFrameInfo();
906 SSARegMap *RegMap = MF.getSSARegMap();
907 SmallVector<SDOperand, 8> ArgValues;
908 SDOperand Root = Op.getOperand(0);
909 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
911 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
912 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
914 unsigned ArgOffset = SPUFrameInfo::minStackSize();
915 unsigned ArgRegIdx = 0;
916 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
918 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
920 // Add DAG nodes to load the arguments or copy them out of registers.
921 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
923 bool needsLoad = false;
924 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
925 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
929 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
930 << MVT::getValueTypeString(ObjectVT)
935 if (!isVarArg && ArgRegIdx < NumArgRegs) {
936 unsigned VReg = RegMap->createVirtualRegister(&SPU::R8CRegClass);
937 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
938 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
945 if (!isVarArg && ArgRegIdx < NumArgRegs) {
946 unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
947 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
948 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
955 if (!isVarArg && ArgRegIdx < NumArgRegs) {
956 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
957 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
958 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
965 if (!isVarArg && ArgRegIdx < NumArgRegs) {
966 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
967 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
968 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
975 if (!isVarArg && ArgRegIdx < NumArgRegs) {
976 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
977 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
978 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
985 if (!isVarArg && ArgRegIdx < NumArgRegs) {
986 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
987 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
988 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
999 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1000 unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1001 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1002 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1010 // We need to load the argument to a virtual register if we determined above
1011 // that we ran out of physical registers of the appropriate type
1013 // If the argument is actually used, emit a load from the right stack
1015 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
1016 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1017 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1018 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1020 // Don't emit a dead load.
1021 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1024 ArgOffset += StackSlotSize;
1027 ArgValues.push_back(ArgVal);
1030 // If the function takes variable number of arguments, make a frame index for
1031 // the start of the first vararg value... for expansion of llvm.va_start.
1033 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1035 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1036 // If this function is vararg, store any remaining integer argument regs to
1037 // their spots on the stack so that they may be loaded by deferencing the
1038 // result of va_next.
1039 SmallVector<SDOperand, 8> MemOps;
1040 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1041 unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
1042 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1043 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1044 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1045 MemOps.push_back(Store);
1046 // Increment the address by four for the next argument to store
1047 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1048 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1050 if (!MemOps.empty())
1051 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1054 ArgValues.push_back(Root);
1056 // Return the new list of results.
1057 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1058 Op.Val->value_end());
1059 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1062 /// isLSAAddress - Return the immediate to use if the specified
1063 /// value is representable as a LSA address.
1064 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1065 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1068 int Addr = C->getValue();
1069 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1070 (Addr << 14 >> 14) != Addr)
1071 return 0; // Top 14 bits have to be sext of immediate.
1073 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1078 LowerCALL(SDOperand Op, SelectionDAG &DAG) {
1079 SDOperand Chain = Op.getOperand(0);
1081 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1082 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1084 SDOperand Callee = Op.getOperand(4);
1085 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1086 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1087 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1088 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1090 // Handy pointer type
1091 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1093 // Accumulate how many bytes are to be pushed on the stack, including the
1094 // linkage area, and parameter passing area. According to the SPU ABI,
1095 // we minimally need space for [LR] and [SP]
1096 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1098 // Set up a copy of the stack pointer for use loading and storing any
1099 // arguments that may not fit in the registers available for argument
1101 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1103 // Figure out which arguments are going to go in registers, and which in
1105 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1106 unsigned ArgRegIdx = 0;
1108 // Keep track of registers passing arguments
1109 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1110 // And the arguments passed on the stack
1111 SmallVector<SDOperand, 8> MemOpChains;
1113 for (unsigned i = 0; i != NumOps; ++i) {
1114 SDOperand Arg = Op.getOperand(5+2*i);
1116 // PtrOff will be used to store the current argument to the stack if a
1117 // register cannot be found for it.
1118 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1119 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1121 switch (Arg.getValueType()) {
1122 default: assert(0 && "Unexpected ValueType for argument!");
1126 if (ArgRegIdx != NumArgRegs) {
1127 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1129 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1130 ArgOffset += StackSlotSize;
1135 if (ArgRegIdx != NumArgRegs) {
1136 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1138 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1139 ArgOffset += StackSlotSize;
1146 if (ArgRegIdx != NumArgRegs) {
1147 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1149 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1150 ArgOffset += StackSlotSize;
1156 // Update number of stack bytes actually used, insert a call sequence start
1157 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1158 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1160 if (!MemOpChains.empty()) {
1161 // Adjust the stack pointer for the stack arguments.
1162 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1163 &MemOpChains[0], MemOpChains.size());
1166 // Build a sequence of copy-to-reg nodes chained together with token chain
1167 // and flag operands which copy the outgoing args into the appropriate regs.
1169 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1170 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1172 InFlag = Chain.getValue(1);
1175 std::vector<MVT::ValueType> NodeTys;
1176 NodeTys.push_back(MVT::Other); // Returns a chain
1177 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1179 SmallVector<SDOperand, 8> Ops;
1180 unsigned CallOpc = SPUISD::CALL;
1182 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1183 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1184 // node so that legalize doesn't hack it.
1185 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1186 GlobalValue *GV = G->getGlobal();
1187 unsigned CalleeVT = Callee.getValueType();
1189 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1190 // style calls, otherwise, external symbols are BRASL calls.
1192 // This may be an unsafe assumption for JIT and really large compilation
1194 if (GV->isDeclaration()) {
1195 Callee = DAG.getGlobalAddress(GV, CalleeVT);
1197 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
1198 DAG.getTargetGlobalAddress(GV, CalleeVT),
1199 DAG.getConstant(0, PtrVT));
1201 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1202 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1203 else if (SDNode *Dest = isLSAAddress(Callee, DAG))
1204 // If this is an absolute destination address that appears to be a legal
1205 // local store address, use the munged value.
1206 Callee = SDOperand(Dest, 0);
1208 Ops.push_back(Chain);
1209 Ops.push_back(Callee);
1211 // Add argument registers to the end of the list so that they are known live
1213 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1214 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1215 RegsToPass[i].second.getValueType()));
1218 Ops.push_back(InFlag);
1219 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1220 InFlag = Chain.getValue(1);
1222 SDOperand ResultVals[3];
1223 unsigned NumResults = 0;
1226 // If the call has results, copy the values out of the ret val registers.
1227 switch (Op.Val->getValueType(0)) {
1228 default: assert(0 && "Unexpected ret value!");
1229 case MVT::Other: break;
1231 if (Op.Val->getValueType(1) == MVT::i32) {
1232 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1233 ResultVals[0] = Chain.getValue(0);
1234 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1235 Chain.getValue(2)).getValue(1);
1236 ResultVals[1] = Chain.getValue(0);
1238 NodeTys.push_back(MVT::i32);
1240 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1241 ResultVals[0] = Chain.getValue(0);
1244 NodeTys.push_back(MVT::i32);
1247 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1248 ResultVals[0] = Chain.getValue(0);
1250 NodeTys.push_back(MVT::i64);
1254 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1255 InFlag).getValue(1);
1256 ResultVals[0] = Chain.getValue(0);
1258 NodeTys.push_back(Op.Val->getValueType(0));
1265 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1266 InFlag).getValue(1);
1267 ResultVals[0] = Chain.getValue(0);
1269 NodeTys.push_back(Op.Val->getValueType(0));
1273 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1274 DAG.getConstant(NumStackBytes, PtrVT));
1275 NodeTys.push_back(MVT::Other);
1277 // If the function returns void, just return the chain.
1278 if (NumResults == 0)
1281 // Otherwise, merge everything together with a MERGE_VALUES node.
1282 ResultVals[NumResults++] = Chain;
1283 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1284 ResultVals, NumResults);
1285 return Res.getValue(Op.ResNo);
1289 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1290 SmallVector<CCValAssign, 16> RVLocs;
1291 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1292 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1293 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1294 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1296 // If this is the first return lowered for this function, add the regs to the
1297 // liveout set for the function.
1298 if (DAG.getMachineFunction().liveout_empty()) {
1299 for (unsigned i = 0; i != RVLocs.size(); ++i)
1300 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
1303 SDOperand Chain = Op.getOperand(0);
1306 // Copy the result values into the output registers.
1307 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1308 CCValAssign &VA = RVLocs[i];
1309 assert(VA.isRegLoc() && "Can only return in registers!");
1310 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1311 Flag = Chain.getValue(1);
1315 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1317 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1321 //===----------------------------------------------------------------------===//
1322 // Vector related lowering:
1323 //===----------------------------------------------------------------------===//
1325 static ConstantSDNode *
1326 getVecImm(SDNode *N) {
1327 SDOperand OpVal(0, 0);
1329 // Check to see if this buildvec has a single non-undef value in its elements.
1330 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1331 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1333 OpVal = N->getOperand(i);
1334 else if (OpVal != N->getOperand(i))
1338 if (OpVal.Val != 0) {
1339 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1344 return 0; // All UNDEF: use implicit def.; not Constant node
1347 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1348 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1350 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1351 MVT::ValueType ValueType) {
1352 if (ConstantSDNode *CN = getVecImm(N)) {
1353 uint64_t Value = CN->getValue();
1354 if (Value <= 0x3ffff)
1355 return DAG.getConstant(Value, ValueType);
1361 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1362 /// and the value fits into a signed 16-bit constant, and if so, return the
1364 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1365 MVT::ValueType ValueType) {
1366 if (ConstantSDNode *CN = getVecImm(N)) {
1367 if (ValueType == MVT::i32) {
1368 int Value = (int) CN->getValue();
1369 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1371 if (Value == SExtValue)
1372 return DAG.getConstant(Value, ValueType);
1373 } else if (ValueType == MVT::i16) {
1374 short Value = (short) CN->getValue();
1375 int SExtValue = ((int) Value << 16) >> 16;
1377 if (Value == (short) SExtValue)
1378 return DAG.getConstant(Value, ValueType);
1379 } else if (ValueType == MVT::i64) {
1380 int64_t Value = CN->getValue();
1381 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1383 if (Value == SExtValue)
1384 return DAG.getConstant(Value, ValueType);
1391 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1392 /// and the value fits into a signed 10-bit constant, and if so, return the
1394 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1395 MVT::ValueType ValueType) {
1396 if (ConstantSDNode *CN = getVecImm(N)) {
1397 int Value = (int) CN->getValue();
1398 if ((ValueType == MVT::i32 && isS10Constant(Value))
1399 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1400 return DAG.getConstant(Value, ValueType);
1406 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1407 /// and the value fits into a signed 8-bit constant, and if so, return the
1410 /// @note: The incoming vector is v16i8 because that's the only way we can load
1411 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1413 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1414 MVT::ValueType ValueType) {
1415 if (ConstantSDNode *CN = getVecImm(N)) {
1416 int Value = (int) CN->getValue();
1417 if (ValueType == MVT::i16
1418 && Value <= 0xffff /* truncated from uint64_t */
1419 && ((short) Value >> 8) == ((short) Value & 0xff))
1420 return DAG.getConstant(Value & 0xff, ValueType);
1421 else if (ValueType == MVT::i8
1422 && (Value & 0xff) == Value)
1423 return DAG.getConstant(Value, ValueType);
1429 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1430 /// and the value fits into a signed 16-bit constant, and if so, return the
1432 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1433 MVT::ValueType ValueType) {
1434 if (ConstantSDNode *CN = getVecImm(N)) {
1435 uint64_t Value = CN->getValue();
1436 if ((ValueType == MVT::i32
1437 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1438 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1439 return DAG.getConstant(Value >> 16, ValueType);
1445 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1446 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1447 if (ConstantSDNode *CN = getVecImm(N)) {
1448 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1454 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1455 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1456 if (ConstantSDNode *CN = getVecImm(N)) {
1457 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1463 // If this is a vector of constants or undefs, get the bits. A bit in
1464 // UndefBits is set if the corresponding element of the vector is an
1465 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1466 // zero. Return true if this is not an array of constants, false if it is.
1468 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1469 uint64_t UndefBits[2]) {
1470 // Start with zero'd results.
1471 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1473 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1474 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1475 SDOperand OpVal = BV->getOperand(i);
1477 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1478 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1480 uint64_t EltBits = 0;
1481 if (OpVal.getOpcode() == ISD::UNDEF) {
1482 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1483 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1485 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1486 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1487 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1488 const APFloat &apf = CN->getValueAPF();
1489 EltBits = (CN->getValueType(0) == MVT::f32
1490 ? FloatToBits(apf.convertToFloat())
1491 : DoubleToBits(apf.convertToDouble()));
1493 // Nonconstant element.
1497 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1500 //printf("%llx %llx %llx %llx\n",
1501 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1505 /// If this is a splat (repetition) of a value across the whole vector, return
1506 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1507 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1508 /// SplatSize = 1 byte.
1509 static bool isConstantSplat(const uint64_t Bits128[2],
1510 const uint64_t Undef128[2],
1512 uint64_t &SplatBits, uint64_t &SplatUndef,
1514 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1515 // the same as the lower 64-bits, ignoring undefs.
1516 uint64_t Bits64 = Bits128[0] | Bits128[1];
1517 uint64_t Undef64 = Undef128[0] & Undef128[1];
1518 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1519 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1520 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1521 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1523 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1524 if (MinSplatBits < 64) {
1526 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1528 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1529 if (MinSplatBits < 32) {
1531 // If the top 16-bits are different than the lower 16-bits, ignoring
1532 // undefs, we have an i32 splat.
1533 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1534 if (MinSplatBits < 16) {
1535 // If the top 8-bits are different than the lower 8-bits, ignoring
1536 // undefs, we have an i16 splat.
1537 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1538 // Otherwise, we have an 8-bit splat.
1539 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1540 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1546 SplatUndef = Undef16;
1553 SplatUndef = Undef32;
1559 SplatBits = Bits128[0];
1560 SplatUndef = Undef128[0];
1566 return false; // Can't be a splat if two pieces don't match.
1569 // If this is a case we can't handle, return null and let the default
1570 // expansion code take care of it. If we CAN select this case, and if it
1571 // selects to a single instruction, return Op. Otherwise, if we can codegen
1572 // this case more efficiently than a constant pool load, lower it to the
1573 // sequence of ops that should be used.
1574 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1575 MVT::ValueType VT = Op.getValueType();
1576 // If this is a vector of constants or undefs, get the bits. A bit in
1577 // UndefBits is set if the corresponding element of the vector is an
1578 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1580 uint64_t VectorBits[2];
1581 uint64_t UndefBits[2];
1582 uint64_t SplatBits, SplatUndef;
1584 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1585 || !isConstantSplat(VectorBits, UndefBits,
1586 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1587 SplatBits, SplatUndef, SplatSize))
1588 return SDOperand(); // Not a constant vector, not a splat.
1593 uint32_t Value32 = SplatBits;
1594 assert(SplatSize == 4
1595 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1596 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1597 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1598 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1599 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1603 uint64_t f64val = SplatBits;
1604 assert(SplatSize == 8
1605 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1606 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1607 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1608 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1609 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1613 // 8-bit constants have to be expanded to 16-bits
1614 unsigned short Value16 = SplatBits | (SplatBits << 8);
1616 for (int i = 0; i < 8; ++i)
1617 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1618 return DAG.getNode(ISD::BIT_CONVERT, VT,
1619 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1622 unsigned short Value16;
1624 Value16 = (unsigned short) (SplatBits & 0xffff);
1626 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1627 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1629 for (int i = 0; i < 8; ++i) Ops[i] = T;
1630 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1633 unsigned int Value = SplatBits;
1634 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1635 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1638 uint64_t val = SplatBits;
1639 uint32_t upper = uint32_t(val >> 32);
1640 uint32_t lower = uint32_t(val);
1645 SmallVector<SDOperand, 16> ShufBytes;
1647 bool upper_special, lower_special;
1649 // NOTE: This code creates common-case shuffle masks that can be easily
1650 // detected as common expressions. It is not attempting to create highly
1651 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1653 // Detect if the upper or lower half is a special shuffle mask pattern:
1654 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1655 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1657 // Create lower vector if not a special pattern
1658 if (!lower_special) {
1659 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1660 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1661 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1662 LO32C, LO32C, LO32C, LO32C));
1665 // Create upper vector if not a special pattern
1666 if (!upper_special) {
1667 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1668 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1669 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1670 HI32C, HI32C, HI32C, HI32C));
1673 // If either upper or lower are special, then the two input operands are
1674 // the same (basically, one of them is a "don't care")
1679 if (lower_special && upper_special) {
1680 // Unhappy situation... both upper and lower are special, so punt with
1681 // a target constant:
1682 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1683 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1687 for (int i = 0; i < 4; ++i) {
1688 for (int j = 0; j < 4; ++j) {
1690 bool process_upper, process_lower;
1693 process_upper = (upper_special && (i & 1) == 0);
1694 process_lower = (lower_special && (i & 1) == 1);
1696 if (process_upper || process_lower) {
1697 if ((process_upper && upper == 0)
1698 || (process_lower && lower == 0))
1700 else if ((process_upper && upper == 0xffffffff)
1701 || (process_lower && lower == 0xffffffff))
1703 else if ((process_upper && upper == 0x80000000)
1704 || (process_lower && lower == 0x80000000))
1705 val = (j == 0 ? 0xe0 : 0x80);
1707 val = i * 4 + j + ((i & 1) * 16);
1709 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1713 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1714 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1715 &ShufBytes[0], ShufBytes.size()));
1717 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1718 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1719 return DAG.getNode(ISD::BIT_CONVERT, VT,
1720 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1721 Zero, Zero, Zero, Zero));
1729 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1730 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1731 /// permutation vector, V3, is monotonically increasing with one "exception"
1732 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1733 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1734 /// In either case, the net result is going to eventually invoke SHUFB to
1735 /// permute/shuffle the bytes from V1 and V2.
1737 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1738 /// control word for byte/halfword/word insertion. This takes care of a single
1739 /// element move from V2 into V1.
1741 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1742 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1743 SDOperand V1 = Op.getOperand(0);
1744 SDOperand V2 = Op.getOperand(1);
1745 SDOperand PermMask = Op.getOperand(2);
1747 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1749 // If we have a single element being moved from V1 to V2, this can be handled
1750 // using the C*[DX] compute mask instructions, but the vector elements have
1751 // to be monotonically increasing with one exception element.
1752 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1753 unsigned EltsFromV2 = 0;
1755 unsigned V2EltIdx0 = 0;
1756 unsigned CurrElt = 0;
1757 bool monotonic = true;
1758 if (EltVT == MVT::i8)
1760 else if (EltVT == MVT::i16)
1762 else if (EltVT == MVT::i32)
1765 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1767 for (unsigned i = 0, e = PermMask.getNumOperands();
1768 EltsFromV2 <= 1 && monotonic && i != e;
1771 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1774 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1776 if (SrcElt >= V2EltIdx0) {
1778 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1779 } else if (CurrElt != SrcElt) {
1786 if (EltsFromV2 == 1 && monotonic) {
1787 // Compute mask and shuffle
1788 MachineFunction &MF = DAG.getMachineFunction();
1789 SSARegMap *RegMap = MF.getSSARegMap();
1790 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
1791 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1792 // Initialize temporary register to 0
1793 SDOperand InitTempReg =
1794 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1795 // Copy register's contents as index in INSERT_MASK:
1796 SDOperand ShufMaskOp =
1797 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1798 DAG.getTargetConstant(V2Elt, MVT::i32),
1799 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1800 // Use shuffle mask in SHUFB synthetic instruction:
1801 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1803 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1804 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1806 SmallVector<SDOperand, 16> ResultMask;
1807 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1809 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1812 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1814 for (unsigned j = 0; j != BytesPerElement; ++j) {
1815 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1820 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1821 &ResultMask[0], ResultMask.size());
1822 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1826 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1827 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1829 if (Op0.Val->getOpcode() == ISD::Constant) {
1830 // For a constant, build the appropriate constant vector, which will
1831 // eventually simplify to a vector register load.
1833 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1834 SmallVector<SDOperand, 16> ConstVecValues;
1838 // Create a constant vector:
1839 switch (Op.getValueType()) {
1840 default: assert(0 && "Unexpected constant value type in "
1841 "LowerSCALAR_TO_VECTOR");
1842 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1843 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1844 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1845 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1846 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1847 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1850 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1851 for (size_t j = 0; j < n_copies; ++j)
1852 ConstVecValues.push_back(CValue);
1854 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1855 &ConstVecValues[0], ConstVecValues.size());
1857 // Otherwise, copy the value from one register to another:
1858 switch (Op0.getValueType()) {
1859 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1866 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1873 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1874 switch (Op.getValueType()) {
1876 SDOperand rA = Op.getOperand(0);
1877 SDOperand rB = Op.getOperand(1);
1878 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1879 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1880 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1881 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1883 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1887 // Multiply two v8i16 vectors (pipeline friendly version):
1888 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1889 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1890 // c) Use SELB to select upper and lower halves from the intermediate results
1892 // NOTE: We really want to move the FSMBI to earlier to actually get the
1893 // dual-issue. This code does manage to do this, even if it's a little on
1896 MachineFunction &MF = DAG.getMachineFunction();
1897 SSARegMap *RegMap = MF.getSSARegMap();
1898 SDOperand Chain = Op.getOperand(0);
1899 SDOperand rA = Op.getOperand(0);
1900 SDOperand rB = Op.getOperand(1);
1901 unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1902 unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1905 DAG.getCopyToReg(Chain, FSMBIreg,
1906 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1907 DAG.getConstant(0xcccc, MVT::i32)));
1910 DAG.getCopyToReg(FSMBOp, HiProdReg,
1911 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1913 SDOperand HHProd_v4i32 =
1914 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1915 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1917 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1918 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1919 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1920 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1922 DAG.getConstant(16, MVT::i16))),
1923 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1926 // This M00sE is N@stI! (apologies to Monty Python)
1928 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1929 // is to break it all apart, sign extend, and reassemble the various
1930 // intermediate products.
1932 MachineFunction &MF = DAG.getMachineFunction();
1933 SSARegMap *RegMap = MF.getSSARegMap();
1934 SDOperand Chain = Op.getOperand(0);
1935 SDOperand rA = Op.getOperand(0);
1936 SDOperand rB = Op.getOperand(1);
1937 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1938 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1940 unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1941 unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1942 unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1945 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1946 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1947 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1949 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1951 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1954 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1955 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1957 SDOperand FSMBdef_2222 =
1958 DAG.getCopyToReg(Chain, FSMBreg_2222,
1959 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1960 DAG.getConstant(0x2222, MVT::i32)));
1962 SDOperand FSMBuse_2222 =
1963 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1965 SDOperand LoProd_1 =
1966 DAG.getCopyToReg(Chain, LoProd_reg,
1967 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1970 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1973 DAG.getNode(ISD::AND, MVT::v4i32,
1974 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1975 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1976 LoProdMask, LoProdMask,
1977 LoProdMask, LoProdMask));
1980 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1981 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1984 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1985 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1988 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1989 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1990 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1992 SDOperand HHProd_1 =
1993 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1994 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1995 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1996 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1997 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
2000 DAG.getCopyToReg(Chain, HiProd_reg,
2001 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2003 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2007 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2008 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2010 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2011 DAG.getNode(ISD::OR, MVT::v4i32,
2016 cerr << "CellSPU: Unknown vector multiplication, got "
2017 << MVT::getValueTypeString(Op.getValueType())
2026 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2027 MachineFunction &MF = DAG.getMachineFunction();
2028 SSARegMap *RegMap = MF.getSSARegMap();
2030 SDOperand A = Op.getOperand(0);
2031 SDOperand B = Op.getOperand(1);
2032 unsigned VT = Op.getValueType();
2034 unsigned VRegBR, VRegC;
2036 if (VT == MVT::f32) {
2037 VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2038 VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2040 VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2041 VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2043 // TODO: make sure we're feeding FPInterp the right arguments
2044 // Right now: fi B, frest(B)
2047 // (Floating Interpolate (FP Reciprocal Estimate B))
2049 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2050 DAG.getNode(SPUISD::FPInterp, VT, B,
2051 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2053 // Computes A * BRcpl and stores in a temporary register
2055 DAG.getCopyToReg(BRcpl, VRegC,
2056 DAG.getNode(ISD::FMUL, VT, A,
2057 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2058 // What's the Chain variable do? It's magic!
2059 // TODO: set Chain = Op(0).getEntryNode()
2061 return DAG.getNode(ISD::FADD, VT,
2062 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2063 DAG.getNode(ISD::FMUL, VT,
2064 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2065 DAG.getNode(ISD::FSUB, VT, A,
2066 DAG.getNode(ISD::FMUL, VT, B,
2067 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2070 // Expands double-precision FDIV
2071 // Expects two doubles as inputs X and Y, does a floating point
2072 // reciprocal estimate, and three iterations of Newton-Raphson
2073 // to increase accuracy.
2074 //static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
2075 // MachineFunction &MF = DAG.getMachineFunction();
2076 // SSARegMap *RegMap = MF.getSSARegMap();
2078 // SDOperand X = Op.getOperand(0);
2079 // SDOperand Y = Op.getOperand(1);
2082 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2083 unsigned VT = Op.getValueType();
2084 SDOperand N = Op.getOperand(0);
2085 SDOperand Elt = Op.getOperand(1);
2086 SDOperand ShufMask[16];
2087 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2089 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2091 int EltNo = (int) C->getValue();
2094 if (VT == MVT::i8 && EltNo >= 16)
2095 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2096 else if (VT == MVT::i16 && EltNo >= 8)
2097 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2098 else if (VT == MVT::i32 && EltNo >= 4)
2099 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2100 else if (VT == MVT::i64 && EltNo >= 2)
2101 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2103 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2104 // i32 and i64: Element 0 is the preferred slot
2105 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2108 // Need to generate shuffle mask and extract:
2109 int prefslot_begin = -1, prefslot_end = -1;
2110 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2114 prefslot_begin = prefslot_end = 3;
2118 prefslot_begin = 2; prefslot_end = 3;
2122 prefslot_begin = 0; prefslot_end = 3;
2126 prefslot_begin = 0; prefslot_end = 7;
2131 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2132 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2134 for (int i = 0; i < 16; ++i) {
2135 // zero fill uppper part of preferred slot, don't care about the
2137 unsigned int mask_val;
2139 if (i <= prefslot_end) {
2141 ((i < prefslot_begin)
2143 : elt_byte + (i - prefslot_begin));
2145 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2147 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2150 SDOperand ShufMaskVec =
2151 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2153 sizeof(ShufMask) / sizeof(ShufMask[0]));
2155 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2156 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2157 N, N, ShufMaskVec));
2161 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2162 SDOperand VecOp = Op.getOperand(0);
2163 SDOperand ValOp = Op.getOperand(1);
2164 SDOperand IdxOp = Op.getOperand(2);
2165 MVT::ValueType VT = Op.getValueType();
2167 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2168 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2170 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2171 // Use $2 because it's always 16-byte aligned and it's available:
2172 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2175 DAG.getNode(SPUISD::SHUFB, VT,
2176 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2178 DAG.getNode(SPUISD::INSERT_MASK, VT,
2179 DAG.getNode(ISD::ADD, PtrVT,
2181 DAG.getConstant(CN->getValue(),
2187 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2188 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2190 assert(Op.getValueType() == MVT::i8);
2193 assert(0 && "Unhandled i8 math operator");
2197 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2199 SDOperand N1 = Op.getOperand(1);
2200 N0 = (N0.getOpcode() != ISD::Constant
2201 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2202 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2203 N1 = (N1.getOpcode() != ISD::Constant
2204 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2205 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2206 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2207 DAG.getNode(Opc, MVT::i16, N0, N1));
2211 SDOperand N1 = Op.getOperand(1);
2213 N0 = (N0.getOpcode() != ISD::Constant
2214 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2215 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2216 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2217 N1 = (N1.getOpcode() != ISD::Constant
2218 ? DAG.getNode(N1Opc, MVT::i16, N1)
2219 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2220 SDOperand ExpandArg =
2221 DAG.getNode(ISD::OR, MVT::i16, N0,
2222 DAG.getNode(ISD::SHL, MVT::i16,
2223 N0, DAG.getConstant(8, MVT::i16)));
2224 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2225 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2229 SDOperand N1 = Op.getOperand(1);
2231 N0 = (N0.getOpcode() != ISD::Constant
2232 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2233 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2234 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2235 N1 = (N1.getOpcode() != ISD::Constant
2236 ? DAG.getNode(N1Opc, MVT::i16, N1)
2237 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2238 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2239 DAG.getNode(Opc, MVT::i16, N0, N1));
2242 SDOperand N1 = Op.getOperand(1);
2244 N0 = (N0.getOpcode() != ISD::Constant
2245 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2246 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2247 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2248 N1 = (N1.getOpcode() != ISD::Constant
2249 ? DAG.getNode(N1Opc, MVT::i16, N1)
2250 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2251 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2252 DAG.getNode(Opc, MVT::i16, N0, N1));
2255 SDOperand N1 = Op.getOperand(1);
2257 N0 = (N0.getOpcode() != ISD::Constant
2258 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2259 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2260 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2261 N1 = (N1.getOpcode() != ISD::Constant
2262 ? DAG.getNode(N1Opc, MVT::i16, N1)
2263 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2264 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2265 DAG.getNode(Opc, MVT::i16, N0, N1));
2273 //! Lower byte immediate operations for v16i8 vectors:
2275 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2278 MVT::ValueType VT = Op.getValueType();
2280 ConstVec = Op.getOperand(0);
2281 Arg = Op.getOperand(1);
2282 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2283 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2284 ConstVec = ConstVec.getOperand(0);
2286 ConstVec = Op.getOperand(1);
2287 Arg = Op.getOperand(0);
2288 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2289 ConstVec = ConstVec.getOperand(0);
2294 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2295 uint64_t VectorBits[2];
2296 uint64_t UndefBits[2];
2297 uint64_t SplatBits, SplatUndef;
2300 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2301 && isConstantSplat(VectorBits, UndefBits,
2302 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2303 SplatBits, SplatUndef, SplatSize)) {
2304 SDOperand tcVec[16];
2305 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2306 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2308 // Turn the BUILD_VECTOR into a set of target constants:
2309 for (size_t i = 0; i < tcVecSize; ++i)
2312 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2313 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2320 //! Lower i32 multiplication
2321 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2325 cerr << "CellSPU: Unknown LowerMUL value type, got "
2326 << MVT::getValueTypeString(Op.getValueType())
2332 SDOperand rA = Op.getOperand(0);
2333 SDOperand rB = Op.getOperand(1);
2335 return DAG.getNode(ISD::ADD, MVT::i32,
2336 DAG.getNode(ISD::ADD, MVT::i32,
2337 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2338 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2339 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2346 //! Custom lowering for CTPOP (count population)
2348 Custom lowering code that counts the number ones in the input
2349 operand. SPU has such an instruction, but it counts the number of
2350 ones per byte, which then have to be accumulated.
2352 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2353 unsigned VT = Op.getValueType();
2354 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2358 SDOperand N = Op.getOperand(0);
2359 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2361 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2362 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2364 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2368 MachineFunction &MF = DAG.getMachineFunction();
2369 SSARegMap *RegMap = MF.getSSARegMap();
2371 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
2373 SDOperand N = Op.getOperand(0);
2374 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2375 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2376 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2378 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2379 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2381 // CNTB_result becomes the chain to which all of the virtual registers
2382 // CNTB_reg, SUM1_reg become associated:
2383 SDOperand CNTB_result =
2384 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2386 SDOperand CNTB_rescopy =
2387 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2389 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2391 return DAG.getNode(ISD::AND, MVT::i16,
2392 DAG.getNode(ISD::ADD, MVT::i16,
2393 DAG.getNode(ISD::SRL, MVT::i16,
2400 MachineFunction &MF = DAG.getMachineFunction();
2401 SSARegMap *RegMap = MF.getSSARegMap();
2403 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2404 unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2406 SDOperand N = Op.getOperand(0);
2407 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2408 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2409 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2410 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2412 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2413 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2415 // CNTB_result becomes the chain to which all of the virtual registers
2416 // CNTB_reg, SUM1_reg become associated:
2417 SDOperand CNTB_result =
2418 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2420 SDOperand CNTB_rescopy =
2421 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2424 DAG.getNode(ISD::SRL, MVT::i32,
2425 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2428 DAG.getNode(ISD::ADD, MVT::i32,
2429 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2431 SDOperand Sum1_rescopy =
2432 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2435 DAG.getNode(ISD::SRL, MVT::i32,
2436 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2439 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2440 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2442 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2452 /// LowerOperation - Provide custom lowering hooks for some operations.
2455 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2457 switch (Op.getOpcode()) {
2459 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2460 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2461 cerr << "*Op.Val:\n";
2468 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2470 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2471 case ISD::ConstantPool:
2472 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2473 case ISD::GlobalAddress:
2474 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2475 case ISD::JumpTable:
2476 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2478 return LowerConstant(Op, DAG);
2479 case ISD::ConstantFP:
2480 return LowerConstantFP(Op, DAG);
2481 case ISD::FORMAL_ARGUMENTS:
2482 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2484 return LowerCALL(Op, DAG);
2486 return LowerRET(Op, DAG, getTargetMachine());
2495 return LowerI8Math(Op, DAG, Op.getOpcode());
2497 // Vector-related lowering.
2498 case ISD::BUILD_VECTOR:
2499 return LowerBUILD_VECTOR(Op, DAG);
2500 case ISD::SCALAR_TO_VECTOR:
2501 return LowerSCALAR_TO_VECTOR(Op, DAG);
2502 case ISD::VECTOR_SHUFFLE:
2503 return LowerVECTOR_SHUFFLE(Op, DAG);
2504 case ISD::EXTRACT_VECTOR_ELT:
2505 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2506 case ISD::INSERT_VECTOR_ELT:
2507 return LowerINSERT_VECTOR_ELT(Op, DAG);
2509 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2513 return LowerByteImmed(Op, DAG);
2515 // Vector and i8 multiply:
2517 if (MVT::isVector(Op.getValueType()))
2518 return LowerVectorMUL(Op, DAG);
2519 else if (Op.getValueType() == MVT::i8)
2520 return LowerI8Math(Op, DAG, Op.getOpcode());
2522 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2525 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2526 return LowerFDIVf32(Op, DAG);
2527 // else if (Op.getValueType() == MVT::f64)
2528 // return LowerFDIVf64(Op, DAG);
2530 assert(0 && "Calling FDIV on unsupported MVT");
2533 return LowerCTPOP(Op, DAG);
2539 //===----------------------------------------------------------------------===//
2540 // Other Lowering Code
2541 //===----------------------------------------------------------------------===//
2544 SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2545 MachineBasicBlock *BB)
2550 //===----------------------------------------------------------------------===//
2551 // Target Optimization Hooks
2552 //===----------------------------------------------------------------------===//
2555 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2558 TargetMachine &TM = getTargetMachine();
2559 SelectionDAG &DAG = DCI.DAG;
2561 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2563 switch (N->getOpcode()) {
2566 // Look for obvious optimizations for shift left:
2567 // a) Replace 0 << V with 0
2568 // b) Replace V << 0 with V
2570 // N.B: llvm will generate an undef node if the shift amount is greater than
2571 // 15 (e.g.: V << 16), which will naturally trigger an assert.
2574 case SPU::SHLQBIIvec:
2576 case SPU::ROTHIr16_i32:
2578 case SPU::ROTIr32_i16:
2579 case SPU::ROTQBYIvec:
2580 case SPU::ROTQBYBIvec:
2581 case SPU::ROTQBIIvec:
2582 case SPU::ROTHMIr16:
2584 case SPU::ROTQMBYIvec: {
2585 if (N0.getOpcode() == ISD::Constant) {
2586 if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2587 if (C->getValue() == 0) // 0 << V -> 0.
2591 SDOperand N1 = N->getOperand(1);
2592 if (N1.getOpcode() == ISD::Constant) {
2593 if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2594 if (C->getValue() == 0) // V << 0 -> V
2605 //===----------------------------------------------------------------------===//
2606 // Inline Assembly Support
2607 //===----------------------------------------------------------------------===//
2609 /// getConstraintType - Given a constraint letter, return the type of
2610 /// constraint it is for this target.
2611 SPUTargetLowering::ConstraintType
2612 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2613 if (ConstraintLetter.size() == 1) {
2614 switch (ConstraintLetter[0]) {
2621 return C_RegisterClass;
2624 return TargetLowering::getConstraintType(ConstraintLetter);
2627 std::pair<unsigned, const TargetRegisterClass*>
2628 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2629 MVT::ValueType VT) const
2631 if (Constraint.size() == 1) {
2632 // GCC RS6000 Constraint Letters
2633 switch (Constraint[0]) {
2637 return std::make_pair(0U, SPU::R64CRegisterClass);
2638 return std::make_pair(0U, SPU::R32CRegisterClass);
2641 return std::make_pair(0U, SPU::R32FPRegisterClass);
2642 else if (VT == MVT::f64)
2643 return std::make_pair(0U, SPU::R64FPRegisterClass);
2646 return std::make_pair(0U, SPU::GPRCRegisterClass);
2650 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2654 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2656 uint64_t &KnownZero,
2658 const SelectionDAG &DAG,
2659 unsigned Depth ) const {
2664 // LowerAsmOperandForConstraint
2666 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2667 char ConstraintLetter,
2668 std::vector<SDOperand> &Ops,
2669 SelectionDAG &DAG) {
2670 // Default, for the time being, to the base class handler
2671 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2674 /// isLegalAddressImmediate - Return true if the integer value can be used
2675 /// as the offset of the target addressing mode.
2676 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2677 // SPU's addresses are 256K:
2678 return (V > -(1 << 18) && V < (1 << 18) - 1);
2681 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {