1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "llvm/ADT/VectorExtras.h"
18 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT::ValueType mapping to useful data for Cell SPU
41 struct valtype_map_s {
42 const MVT::ValueType valtype;
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
72 << MVT::getValueTypeString(VT)
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an existing D-Form
88 bool isMemoryOperand(const SDOperand &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::FrameIndex
94 || Opc == ISD::JumpTable
95 || Opc == ISD::ConstantPool
96 || Opc == ISD::ExternalSymbol
97 || Opc == ISD::TargetGlobalAddress
98 || Opc == ISD::TargetGlobalTLSAddress
99 || Opc == ISD::TargetFrameIndex
100 || Opc == ISD::TargetJumpTable
101 || Opc == ISD::TargetConstantPool
102 || Opc == ISD::TargetExternalSymbol
103 || Opc == SPUISD::DFormAddr);
107 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
108 : TargetLowering(TM),
111 // Fold away setcc operations if possible.
114 // Use _setjmp/_longjmp instead of setjmp/longjmp.
115 setUseUnderscoreSetJmp(true);
116 setUseUnderscoreLongJmp(true);
118 // Set up the SPU's register classes:
119 // NOTE: i8 register class is not registered because we cannot determine when
120 // we need to zero or sign extend for custom-lowered loads and stores.
121 // NOTE: Ignore the previous note. For now. :-)
122 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
123 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
124 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
125 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
126 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
127 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
128 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
130 // SPU has no sign or zero extended loads for i1, i8, i16:
131 setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
132 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
133 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
134 setStoreXAction(MVT::i1, Custom);
136 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
137 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
138 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
139 setStoreXAction(MVT::i8, Custom);
141 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
142 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
143 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
145 // SPU constant load actions are custom lowered:
146 setOperationAction(ISD::Constant, MVT::i64, Custom);
147 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
148 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
150 // SPU's loads and stores have to be custom lowered:
151 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
153 setOperationAction(ISD::LOAD, sctype, Custom);
154 setOperationAction(ISD::STORE, sctype, Custom);
157 // SPU supports BRCOND, although DAGCombine will convert BRCONDs
158 // into BR_CCs. BR_CC instructions are custom selected in
160 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
162 // Expand the jumptable branches
163 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
164 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
165 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
167 // SPU has no intrinsics for these particular operations:
168 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
169 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
170 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
172 // PowerPC has no SREM/UREM instructions
173 setOperationAction(ISD::SREM, MVT::i32, Expand);
174 setOperationAction(ISD::UREM, MVT::i32, Expand);
175 setOperationAction(ISD::SREM, MVT::i64, Expand);
176 setOperationAction(ISD::UREM, MVT::i64, Expand);
178 // We don't support sin/cos/sqrt/fmod
179 setOperationAction(ISD::FSIN , MVT::f64, Expand);
180 setOperationAction(ISD::FCOS , MVT::f64, Expand);
181 setOperationAction(ISD::FREM , MVT::f64, Expand);
182 setOperationAction(ISD::FSIN , MVT::f32, Expand);
183 setOperationAction(ISD::FCOS , MVT::f32, Expand);
184 setOperationAction(ISD::FREM , MVT::f32, Expand);
186 // If we're enabling GP optimizations, use hardware square root
187 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
188 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
190 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
191 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
193 // SPU can do rotate right and left, so legalize it... but customize for i8
194 // because instructions don't exist.
195 setOperationAction(ISD::ROTR, MVT::i32, Legal);
196 setOperationAction(ISD::ROTR, MVT::i16, Legal);
197 setOperationAction(ISD::ROTR, MVT::i8, Custom);
198 setOperationAction(ISD::ROTL, MVT::i32, Legal);
199 setOperationAction(ISD::ROTL, MVT::i16, Legal);
200 setOperationAction(ISD::ROTL, MVT::i8, Custom);
201 // SPU has no native version of shift left/right for i8
202 setOperationAction(ISD::SHL, MVT::i8, Custom);
203 setOperationAction(ISD::SRL, MVT::i8, Custom);
204 setOperationAction(ISD::SRA, MVT::i8, Custom);
206 // Custom lower i32 multiplications
207 setOperationAction(ISD::MUL, MVT::i32, Custom);
209 // Need to custom handle (some) common i8 math ops
210 setOperationAction(ISD::SUB, MVT::i8, Custom);
211 setOperationAction(ISD::MUL, MVT::i8, Custom);
213 // SPU does not have BSWAP. It does have i32 support CTLZ.
214 // CTPOP has to be custom lowered.
215 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
216 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
218 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
219 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
220 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
221 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
223 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
224 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
226 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
228 // SPU does not have select or setcc
229 setOperationAction(ISD::SELECT, MVT::i1, Expand);
230 setOperationAction(ISD::SELECT, MVT::i8, Expand);
231 setOperationAction(ISD::SELECT, MVT::i16, Expand);
232 setOperationAction(ISD::SELECT, MVT::i32, Expand);
233 setOperationAction(ISD::SELECT, MVT::i64, Expand);
234 setOperationAction(ISD::SELECT, MVT::f32, Expand);
235 setOperationAction(ISD::SELECT, MVT::f64, Expand);
237 setOperationAction(ISD::SETCC, MVT::i1, Expand);
238 setOperationAction(ISD::SETCC, MVT::i8, Expand);
239 setOperationAction(ISD::SETCC, MVT::i16, Expand);
240 setOperationAction(ISD::SETCC, MVT::i32, Expand);
241 setOperationAction(ISD::SETCC, MVT::i64, Expand);
242 setOperationAction(ISD::SETCC, MVT::f32, Expand);
243 setOperationAction(ISD::SETCC, MVT::f64, Expand);
245 // SPU has a legal FP -> signed INT instruction
246 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
247 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
248 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
249 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
251 // FDIV on SPU requires custom lowering
252 setOperationAction(ISD::FDIV, MVT::f32, Custom);
253 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
255 // SPU has [U|S]INT_TO_FP
256 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
257 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
258 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
259 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
260 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
261 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
262 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
263 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
265 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
266 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
267 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
268 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
270 // We cannot sextinreg(i1). Expand to shifts.
271 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
273 // Support label based line numbers.
274 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
275 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
277 // We want to legalize GlobalAddress and ConstantPool nodes into the
278 // appropriate instructions to materialize the address.
279 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
280 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
281 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
282 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
283 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
284 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
285 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
286 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
288 // RET must be custom lowered, to meet ABI requirements
289 setOperationAction(ISD::RET, MVT::Other, Custom);
291 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
292 setOperationAction(ISD::VASTART , MVT::Other, Custom);
294 // Use the default implementation.
295 setOperationAction(ISD::VAARG , MVT::Other, Expand);
296 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
297 setOperationAction(ISD::VAEND , MVT::Other, Expand);
298 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
299 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
300 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
301 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
303 // Cell SPU has instructions for converting between i64 and fp.
304 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
305 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
307 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
308 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
310 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
311 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
313 // First set operation action for all vector types to expand. Then we
314 // will selectively turn on ones that can be effectively codegen'd.
315 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
316 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
317 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
318 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
319 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
320 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
322 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
323 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
324 // add/sub are legal for all supported vector VT's.
325 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
326 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
327 // mul has to be custom lowered.
328 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
330 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
331 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
332 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
333 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
334 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
335 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
337 // These operations need to be expanded:
338 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
339 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
340 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
341 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
342 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
344 // Custom lower build_vector, constant pool spills, insert and
345 // extract vector elements:
346 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
347 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
348 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
349 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
350 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
351 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
354 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
355 setOperationAction(ISD::AND, MVT::v16i8, Custom);
356 setOperationAction(ISD::OR, MVT::v16i8, Custom);
357 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
358 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
360 setSetCCResultType(MVT::i32);
361 setShiftAmountType(MVT::i32);
362 setSetCCResultContents(ZeroOrOneSetCCResult);
364 setStackPointerRegisterToSaveRestore(SPU::R1);
366 // We have target-specific dag combine patterns for the following nodes:
367 // e.g., setTargetDAGCombine(ISD::SUB);
369 computeRegisterProperties();
373 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
375 if (node_names.empty()) {
376 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
377 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
378 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
379 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
380 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
381 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
382 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
383 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
384 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
385 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
386 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
387 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
388 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
389 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
390 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
391 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
392 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
393 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
394 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
395 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
396 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
397 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
398 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
399 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
400 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
401 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
402 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
403 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
404 "SPUISD::ROTBYTES_RIGHT_Z";
405 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
406 "SPUISD::ROTBYTES_RIGHT_S";
407 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
408 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
409 "SPUISD::ROTBYTES_LEFT_CHAINED";
410 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
411 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
412 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
413 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
414 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
415 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
418 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
420 return ((i != node_names.end()) ? i->second : 0);
423 //===----------------------------------------------------------------------===//
424 // Calling convention code:
425 //===----------------------------------------------------------------------===//
427 #include "SPUGenCallingConv.inc"
429 //===----------------------------------------------------------------------===//
430 // LowerOperation implementation
431 //===----------------------------------------------------------------------===//
433 /// Custom lower loads for CellSPU
435 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
436 within a 16-byte block, we have to rotate to extract the requested element.
439 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
440 LoadSDNode *LN = cast<LoadSDNode>(Op);
441 SDOperand basep = LN->getBasePtr();
442 SDOperand the_chain = LN->getChain();
443 MVT::ValueType BasepOpc = basep.Val->getOpcode();
444 MVT::ValueType VT = LN->getLoadedVT();
445 MVT::ValueType OpVT = Op.Val->getValueType(0);
446 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
447 ISD::LoadExtType ExtType = LN->getExtensionType();
448 unsigned alignment = LN->getAlignment();
449 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
452 if (BasepOpc == ISD::FrameIndex) {
453 // Loading from a frame index is always properly aligned. Always.
457 // For an extending load of an i1 variable, just call it i8 (or whatever we
458 // were passed) and make it zero-extended:
461 ExtType = ISD::ZEXTLOAD;
464 switch (LN->getAddressingMode()) {
465 case ISD::UNINDEXED: {
467 SDOperand rot_op, rotamt;
472 // The vector type we really want to be when we load the 16-byte chunk
473 MVT::ValueType vecVT, opVecVT;
477 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
478 opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
480 if (basep.getOpcode() == ISD::ADD) {
481 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
484 && "LowerLOAD: ISD::ADD operand 1 is not constant");
486 c_offset = (int) CN->getValue();
487 c_rotamt = (int) (c_offset & 0xf);
489 // Adjust the rotation amount to ensure that the final result ends up in
490 // the preferred slot:
491 c_rotamt -= vtm->prefslot_byte;
492 ptrp = basep.getOperand(0);
495 c_rotamt = -vtm->prefslot_byte;
499 if (alignment == 16) {
500 // 16-byte aligned load into preferred slot, no rotation
502 if (isMemoryOperand(ptrp))
506 // Return modified D-Form address for pointer:
507 ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
508 ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
510 return DAG.getLoad(VT, LN->getChain(), ptrp,
511 LN->getSrcValue(), LN->getSrcValueOffset(),
512 LN->isVolatile(), 16);
514 return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
515 LN->getSrcValueOffset(), OpVT,
516 LN->isVolatile(), 16);
522 // Realign the base pointer, with a D-Form address
523 if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp))
524 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
525 ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
530 rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
531 LN->getSrcValue(), LN->getSrcValueOffset(),
532 LN->isVolatile(), 16);
533 the_chain = rot_op.getValue(1);
534 rotamt = DAG.getConstant(c_rotamt, MVT::i16);
536 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
541 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
542 the_chain = result.getValue(1);
544 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
549 scalarvts = DAG.getVTList(VT, MVT::Other);
551 scalarvts = DAG.getVTList(OpVT, MVT::Other);
554 result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
558 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
559 the_chain = result.getValue(1);
561 // Handle the sign and zero-extending loads for i1 and i8:
564 if (ExtType == ISD::SEXTLOAD) {
565 NewOpC = (OpVT == MVT::i1
566 ? SPUISD::EXTRACT_I1_SEXT
567 : SPUISD::EXTRACT_I8_SEXT);
569 assert(ExtType == ISD::ZEXTLOAD);
570 NewOpC = (OpVT == MVT::i1
571 ? SPUISD::EXTRACT_I1_ZEXT
572 : SPUISD::EXTRACT_I8_ZEXT);
575 result = DAG.getNode(NewOpC, OpVT, result);
578 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
579 SDOperand retops[2] = { result, the_chain };
581 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
586 // Misaligned 16-byte load:
587 if (basep.getOpcode() == ISD::LOAD) {
588 LN = cast<LoadSDNode>(basep);
589 if (LN->getAlignment() == 16) {
590 // We can verify that we're really loading from a 16-byte aligned
591 // chunk. Encapsulate basep as a D-Form address and return a new
593 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
594 DAG.getConstant(0, PtrVT));
596 return DAG.getLoad(VT, LN->getChain(), basep,
597 LN->getSrcValue(), LN->getSrcValueOffset(),
598 LN->isVolatile(), 16);
600 return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
601 LN->getSrcValue(), LN->getSrcValueOffset(),
602 OpVT, LN->isVolatile(), 16);
606 // Catch all other cases where we can't guarantee that we have a
607 // 16-byte aligned entity, which means resorting to an X-form
610 SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
611 SDOperand loOp = DAG.getNode(SPUISD::Lo, PtrVT, basep, ZeroOffs);
612 SDOperand hiOp = DAG.getNode(SPUISD::Hi, PtrVT, basep, ZeroOffs);
614 ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
616 SDOperand alignLoad =
617 DAG.getLoad(opVecVT, LN->getChain(), ptrp,
618 LN->getSrcValue(), LN->getSrcValueOffset(),
619 LN->isVolatile(), 16);
621 SDOperand insertEltOp =
622 DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
624 result = DAG.getNode(SPUISD::SHUFB, opVecVT,
627 DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
629 result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
631 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
632 SDOperand retops[2] = { result, the_chain };
634 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
643 case ISD::LAST_INDEXED_MODE:
644 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
646 cerr << (unsigned) LN->getAddressingMode() << "\n";
654 /// Custom lower stores for CellSPU
656 All CellSPU stores are aligned to 16-byte boundaries, so for elements
657 within a 16-byte block, we have to generate a shuffle to insert the
658 requested element into its place, then store the resulting block.
661 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
662 StoreSDNode *SN = cast<StoreSDNode>(Op);
663 SDOperand Value = SN->getValue();
664 MVT::ValueType VT = Value.getValueType();
665 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
666 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
667 SDOperand the_chain = SN->getChain();
668 //unsigned alignment = SN->getAlignment();
669 //const valtype_map_s *vtm = getValueTypeMapEntry(VT);
671 switch (SN->getAddressingMode()) {
672 case ISD::UNINDEXED: {
673 SDOperand basep = SN->getBasePtr();
677 if (basep.getOpcode() == ISD::FrameIndex) {
678 // FrameIndex nodes are always properly aligned. Really.
682 if (basep.getOpcode() == ISD::ADD) {
683 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
685 && "LowerSTORE: ISD::ADD operand 1 is not constant");
686 offset = unsigned(CN->getValue());
687 ptrOp = basep.getOperand(0);
688 DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
696 // The vector type we really want to load from the 16-byte chunk, except
697 // in the case of MVT::i1, which has to be v16i8.
698 unsigned vecVT, stVecVT;
701 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
703 stVecVT = MVT::v16i8;
704 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
706 // Realign the pointer as a D-Form address (ptrOp is the pointer, basep is
707 // the actual dform addr offs($reg).
708 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
709 DAG.getConstant((offset & ~0xf), PtrVT));
711 // Create the 16-byte aligned vector load
712 SDOperand alignLoad =
713 DAG.getLoad(vecVT, the_chain, basep,
714 SN->getSrcValue(), SN->getSrcValueOffset(),
715 SN->isVolatile(), 16);
716 the_chain = alignLoad.getValue(1);
718 LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
719 SDOperand theValue = SN->getValue();
723 && (theValue.getOpcode() == ISD::AssertZext
724 || theValue.getOpcode() == ISD::AssertSext)) {
725 // Drill down and get the value for zero- and sign-extended
727 theValue = theValue.getOperand(0);
730 SDOperand insertEltOp =
731 DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
732 DAG.getNode(SPUISD::DFormAddr, PtrVT,
734 DAG.getConstant((offset & 0xf), PtrVT)));
736 result = DAG.getNode(SPUISD::SHUFB, vecVT,
737 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
739 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
741 result = DAG.getStore(the_chain, result, basep,
742 LN->getSrcValue(), LN->getSrcValueOffset(),
743 LN->isVolatile(), LN->getAlignment());
752 case ISD::LAST_INDEXED_MODE:
753 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
755 cerr << (unsigned) SN->getAddressingMode() << "\n";
763 /// Generate the address of a constant pool entry.
765 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
766 MVT::ValueType PtrVT = Op.getValueType();
767 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
768 Constant *C = CP->getConstVal();
769 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
770 const TargetMachine &TM = DAG.getTarget();
771 SDOperand Zero = DAG.getConstant(0, PtrVT);
773 if (TM.getRelocationModel() == Reloc::Static) {
774 if (!ST->usingLargeMem()) {
775 // Just return the SDOperand with the constant pool address in it.
778 // Generate hi/lo address pair
779 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
780 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
782 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
787 "LowerConstantPool: Relocation model other than static not supported.");
792 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
793 MVT::ValueType PtrVT = Op.getValueType();
794 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
795 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
796 SDOperand Zero = DAG.getConstant(0, PtrVT);
797 const TargetMachine &TM = DAG.getTarget();
799 if (TM.getRelocationModel() == Reloc::Static) {
800 if (!ST->usingLargeMem()) {
801 // Just return the SDOperand with the jump table address in it.
804 // Generate hi/lo address pair
805 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
806 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
808 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
813 "LowerJumpTable: Relocation model other than static not supported.");
818 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
819 MVT::ValueType PtrVT = Op.getValueType();
820 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
821 GlobalValue *GV = GSDN->getGlobal();
822 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
823 SDOperand Zero = DAG.getConstant(0, PtrVT);
824 const TargetMachine &TM = DAG.getTarget();
826 if (TM.getRelocationModel() == Reloc::Static) {
827 if (!ST->usingLargeMem()) {
828 // Generate a local store address
831 // Generate hi/lo address pair
832 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
833 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
835 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
838 cerr << "LowerGlobalAddress: Relocation model other than static not "
847 //! Custom lower i64 integer constants
849 This code inserts all of the necessary juggling that needs to occur to load
850 a 64-bit constant into a register.
853 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
854 unsigned VT = Op.getValueType();
855 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
857 if (VT == MVT::i64) {
858 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
859 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
860 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
863 cerr << "LowerConstant: unhandled constant type "
864 << MVT::getValueTypeString(VT)
873 //! Custom lower single precision floating point constants
875 "float" immediates can be lowered as if they were unsigned 32-bit integers.
876 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
880 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
881 unsigned VT = Op.getValueType();
882 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
885 "LowerConstantFP: Node is not ConstantFPSDNode");
887 if (VT == MVT::f32) {
888 float targetConst = FP->getValueAPF().convertToFloat();
889 return DAG.getNode(SPUISD::SFPConstant, VT,
890 DAG.getTargetConstantFP(targetConst, VT));
891 } else if (VT == MVT::f64) {
892 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
893 return DAG.getNode(ISD::BIT_CONVERT, VT,
894 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
901 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
903 MachineFunction &MF = DAG.getMachineFunction();
904 MachineFrameInfo *MFI = MF.getFrameInfo();
905 MachineRegisterInfo &RegInfo = MF.getRegInfo();
906 SmallVector<SDOperand, 8> ArgValues;
907 SDOperand Root = Op.getOperand(0);
908 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
910 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
911 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
913 unsigned ArgOffset = SPUFrameInfo::minStackSize();
914 unsigned ArgRegIdx = 0;
915 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
917 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
919 // Add DAG nodes to load the arguments or copy them out of registers.
920 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
922 bool needsLoad = false;
923 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
924 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
928 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
929 << MVT::getValueTypeString(ObjectVT)
934 if (!isVarArg && ArgRegIdx < NumArgRegs) {
935 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
936 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
937 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
944 if (!isVarArg && ArgRegIdx < NumArgRegs) {
945 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
946 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
947 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
954 if (!isVarArg && ArgRegIdx < NumArgRegs) {
955 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
956 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
957 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
964 if (!isVarArg && ArgRegIdx < NumArgRegs) {
965 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
966 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
967 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
974 if (!isVarArg && ArgRegIdx < NumArgRegs) {
975 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
976 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
977 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
984 if (!isVarArg && ArgRegIdx < NumArgRegs) {
985 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
986 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
987 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
998 if (!isVarArg && ArgRegIdx < NumArgRegs) {
999 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1000 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1009 // We need to load the argument to a virtual register if we determined above
1010 // that we ran out of physical registers of the appropriate type
1012 // If the argument is actually used, emit a load from the right stack
1014 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
1015 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1016 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1017 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1019 // Don't emit a dead load.
1020 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1023 ArgOffset += StackSlotSize;
1026 ArgValues.push_back(ArgVal);
1029 // If the function takes variable number of arguments, make a frame index for
1030 // the start of the first vararg value... for expansion of llvm.va_start.
1032 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1034 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1035 // If this function is vararg, store any remaining integer argument regs to
1036 // their spots on the stack so that they may be loaded by deferencing the
1037 // result of va_next.
1038 SmallVector<SDOperand, 8> MemOps;
1039 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1040 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1041 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1042 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1043 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1044 MemOps.push_back(Store);
1045 // Increment the address by four for the next argument to store
1046 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1047 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1049 if (!MemOps.empty())
1050 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1053 ArgValues.push_back(Root);
1055 // Return the new list of results.
1056 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1057 Op.Val->value_end());
1058 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1061 /// isLSAAddress - Return the immediate to use if the specified
1062 /// value is representable as a LSA address.
1063 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1064 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1067 int Addr = C->getValue();
1068 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1069 (Addr << 14 >> 14) != Addr)
1070 return 0; // Top 14 bits have to be sext of immediate.
1072 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1077 LowerCALL(SDOperand Op, SelectionDAG &DAG) {
1078 SDOperand Chain = Op.getOperand(0);
1080 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1081 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1083 SDOperand Callee = Op.getOperand(4);
1084 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1085 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1086 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1087 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1089 // Handy pointer type
1090 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1092 // Accumulate how many bytes are to be pushed on the stack, including the
1093 // linkage area, and parameter passing area. According to the SPU ABI,
1094 // we minimally need space for [LR] and [SP]
1095 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1097 // Set up a copy of the stack pointer for use loading and storing any
1098 // arguments that may not fit in the registers available for argument
1100 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1102 // Figure out which arguments are going to go in registers, and which in
1104 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1105 unsigned ArgRegIdx = 0;
1107 // Keep track of registers passing arguments
1108 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1109 // And the arguments passed on the stack
1110 SmallVector<SDOperand, 8> MemOpChains;
1112 for (unsigned i = 0; i != NumOps; ++i) {
1113 SDOperand Arg = Op.getOperand(5+2*i);
1115 // PtrOff will be used to store the current argument to the stack if a
1116 // register cannot be found for it.
1117 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1118 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1120 switch (Arg.getValueType()) {
1121 default: assert(0 && "Unexpected ValueType for argument!");
1125 if (ArgRegIdx != NumArgRegs) {
1126 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1128 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1129 ArgOffset += StackSlotSize;
1134 if (ArgRegIdx != NumArgRegs) {
1135 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1137 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1138 ArgOffset += StackSlotSize;
1145 if (ArgRegIdx != NumArgRegs) {
1146 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1148 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1149 ArgOffset += StackSlotSize;
1155 // Update number of stack bytes actually used, insert a call sequence start
1156 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1157 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1159 if (!MemOpChains.empty()) {
1160 // Adjust the stack pointer for the stack arguments.
1161 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1162 &MemOpChains[0], MemOpChains.size());
1165 // Build a sequence of copy-to-reg nodes chained together with token chain
1166 // and flag operands which copy the outgoing args into the appropriate regs.
1168 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1169 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1171 InFlag = Chain.getValue(1);
1174 std::vector<MVT::ValueType> NodeTys;
1175 NodeTys.push_back(MVT::Other); // Returns a chain
1176 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1178 SmallVector<SDOperand, 8> Ops;
1179 unsigned CallOpc = SPUISD::CALL;
1181 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1182 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1183 // node so that legalize doesn't hack it.
1184 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1185 GlobalValue *GV = G->getGlobal();
1186 unsigned CalleeVT = Callee.getValueType();
1188 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1189 // style calls, otherwise, external symbols are BRASL calls.
1191 // This may be an unsafe assumption for JIT and really large compilation
1193 if (GV->isDeclaration()) {
1194 Callee = DAG.getGlobalAddress(GV, CalleeVT);
1196 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
1197 DAG.getTargetGlobalAddress(GV, CalleeVT),
1198 DAG.getConstant(0, PtrVT));
1200 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1201 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1202 else if (SDNode *Dest = isLSAAddress(Callee, DAG))
1203 // If this is an absolute destination address that appears to be a legal
1204 // local store address, use the munged value.
1205 Callee = SDOperand(Dest, 0);
1207 Ops.push_back(Chain);
1208 Ops.push_back(Callee);
1210 // Add argument registers to the end of the list so that they are known live
1212 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1213 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1214 RegsToPass[i].second.getValueType()));
1217 Ops.push_back(InFlag);
1218 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1219 InFlag = Chain.getValue(1);
1221 SDOperand ResultVals[3];
1222 unsigned NumResults = 0;
1225 // If the call has results, copy the values out of the ret val registers.
1226 switch (Op.Val->getValueType(0)) {
1227 default: assert(0 && "Unexpected ret value!");
1228 case MVT::Other: break;
1230 if (Op.Val->getValueType(1) == MVT::i32) {
1231 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1232 ResultVals[0] = Chain.getValue(0);
1233 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1234 Chain.getValue(2)).getValue(1);
1235 ResultVals[1] = Chain.getValue(0);
1237 NodeTys.push_back(MVT::i32);
1239 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1240 ResultVals[0] = Chain.getValue(0);
1243 NodeTys.push_back(MVT::i32);
1246 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1247 ResultVals[0] = Chain.getValue(0);
1249 NodeTys.push_back(MVT::i64);
1253 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1254 InFlag).getValue(1);
1255 ResultVals[0] = Chain.getValue(0);
1257 NodeTys.push_back(Op.Val->getValueType(0));
1264 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1265 InFlag).getValue(1);
1266 ResultVals[0] = Chain.getValue(0);
1268 NodeTys.push_back(Op.Val->getValueType(0));
1272 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1273 DAG.getConstant(NumStackBytes, PtrVT));
1274 NodeTys.push_back(MVT::Other);
1276 // If the function returns void, just return the chain.
1277 if (NumResults == 0)
1280 // Otherwise, merge everything together with a MERGE_VALUES node.
1281 ResultVals[NumResults++] = Chain;
1282 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1283 ResultVals, NumResults);
1284 return Res.getValue(Op.ResNo);
1288 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1289 SmallVector<CCValAssign, 16> RVLocs;
1290 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1291 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1292 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1293 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1295 // If this is the first return lowered for this function, add the regs to the
1296 // liveout set for the function.
1297 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1298 for (unsigned i = 0; i != RVLocs.size(); ++i)
1299 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1302 SDOperand Chain = Op.getOperand(0);
1305 // Copy the result values into the output registers.
1306 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1307 CCValAssign &VA = RVLocs[i];
1308 assert(VA.isRegLoc() && "Can only return in registers!");
1309 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1310 Flag = Chain.getValue(1);
1314 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1316 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1320 //===----------------------------------------------------------------------===//
1321 // Vector related lowering:
1322 //===----------------------------------------------------------------------===//
1324 static ConstantSDNode *
1325 getVecImm(SDNode *N) {
1326 SDOperand OpVal(0, 0);
1328 // Check to see if this buildvec has a single non-undef value in its elements.
1329 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1330 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1332 OpVal = N->getOperand(i);
1333 else if (OpVal != N->getOperand(i))
1337 if (OpVal.Val != 0) {
1338 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1343 return 0; // All UNDEF: use implicit def.; not Constant node
1346 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1347 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1349 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1350 MVT::ValueType ValueType) {
1351 if (ConstantSDNode *CN = getVecImm(N)) {
1352 uint64_t Value = CN->getValue();
1353 if (Value <= 0x3ffff)
1354 return DAG.getConstant(Value, ValueType);
1360 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1361 /// and the value fits into a signed 16-bit constant, and if so, return the
1363 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1364 MVT::ValueType ValueType) {
1365 if (ConstantSDNode *CN = getVecImm(N)) {
1366 if (ValueType == MVT::i32) {
1367 int Value = (int) CN->getValue();
1368 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1370 if (Value == SExtValue)
1371 return DAG.getConstant(Value, ValueType);
1372 } else if (ValueType == MVT::i16) {
1373 short Value = (short) CN->getValue();
1374 int SExtValue = ((int) Value << 16) >> 16;
1376 if (Value == (short) SExtValue)
1377 return DAG.getConstant(Value, ValueType);
1378 } else if (ValueType == MVT::i64) {
1379 int64_t Value = CN->getValue();
1380 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1382 if (Value == SExtValue)
1383 return DAG.getConstant(Value, ValueType);
1390 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1391 /// and the value fits into a signed 10-bit constant, and if so, return the
1393 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1394 MVT::ValueType ValueType) {
1395 if (ConstantSDNode *CN = getVecImm(N)) {
1396 int Value = (int) CN->getValue();
1397 if ((ValueType == MVT::i32 && isS10Constant(Value))
1398 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1399 return DAG.getConstant(Value, ValueType);
1405 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1406 /// and the value fits into a signed 8-bit constant, and if so, return the
1409 /// @note: The incoming vector is v16i8 because that's the only way we can load
1410 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1412 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1413 MVT::ValueType ValueType) {
1414 if (ConstantSDNode *CN = getVecImm(N)) {
1415 int Value = (int) CN->getValue();
1416 if (ValueType == MVT::i16
1417 && Value <= 0xffff /* truncated from uint64_t */
1418 && ((short) Value >> 8) == ((short) Value & 0xff))
1419 return DAG.getConstant(Value & 0xff, ValueType);
1420 else if (ValueType == MVT::i8
1421 && (Value & 0xff) == Value)
1422 return DAG.getConstant(Value, ValueType);
1428 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1429 /// and the value fits into a signed 16-bit constant, and if so, return the
1431 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1432 MVT::ValueType ValueType) {
1433 if (ConstantSDNode *CN = getVecImm(N)) {
1434 uint64_t Value = CN->getValue();
1435 if ((ValueType == MVT::i32
1436 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1437 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1438 return DAG.getConstant(Value >> 16, ValueType);
1444 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1445 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1446 if (ConstantSDNode *CN = getVecImm(N)) {
1447 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1453 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1454 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1455 if (ConstantSDNode *CN = getVecImm(N)) {
1456 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1462 // If this is a vector of constants or undefs, get the bits. A bit in
1463 // UndefBits is set if the corresponding element of the vector is an
1464 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1465 // zero. Return true if this is not an array of constants, false if it is.
1467 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1468 uint64_t UndefBits[2]) {
1469 // Start with zero'd results.
1470 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1472 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1473 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1474 SDOperand OpVal = BV->getOperand(i);
1476 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1477 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1479 uint64_t EltBits = 0;
1480 if (OpVal.getOpcode() == ISD::UNDEF) {
1481 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1482 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1484 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1485 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1486 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1487 const APFloat &apf = CN->getValueAPF();
1488 EltBits = (CN->getValueType(0) == MVT::f32
1489 ? FloatToBits(apf.convertToFloat())
1490 : DoubleToBits(apf.convertToDouble()));
1492 // Nonconstant element.
1496 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1499 //printf("%llx %llx %llx %llx\n",
1500 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1504 /// If this is a splat (repetition) of a value across the whole vector, return
1505 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1506 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1507 /// SplatSize = 1 byte.
1508 static bool isConstantSplat(const uint64_t Bits128[2],
1509 const uint64_t Undef128[2],
1511 uint64_t &SplatBits, uint64_t &SplatUndef,
1513 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1514 // the same as the lower 64-bits, ignoring undefs.
1515 uint64_t Bits64 = Bits128[0] | Bits128[1];
1516 uint64_t Undef64 = Undef128[0] & Undef128[1];
1517 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1518 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1519 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1520 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1522 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1523 if (MinSplatBits < 64) {
1525 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1527 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1528 if (MinSplatBits < 32) {
1530 // If the top 16-bits are different than the lower 16-bits, ignoring
1531 // undefs, we have an i32 splat.
1532 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1533 if (MinSplatBits < 16) {
1534 // If the top 8-bits are different than the lower 8-bits, ignoring
1535 // undefs, we have an i16 splat.
1536 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1537 // Otherwise, we have an 8-bit splat.
1538 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1539 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1545 SplatUndef = Undef16;
1552 SplatUndef = Undef32;
1558 SplatBits = Bits128[0];
1559 SplatUndef = Undef128[0];
1565 return false; // Can't be a splat if two pieces don't match.
1568 // If this is a case we can't handle, return null and let the default
1569 // expansion code take care of it. If we CAN select this case, and if it
1570 // selects to a single instruction, return Op. Otherwise, if we can codegen
1571 // this case more efficiently than a constant pool load, lower it to the
1572 // sequence of ops that should be used.
1573 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1574 MVT::ValueType VT = Op.getValueType();
1575 // If this is a vector of constants or undefs, get the bits. A bit in
1576 // UndefBits is set if the corresponding element of the vector is an
1577 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1579 uint64_t VectorBits[2];
1580 uint64_t UndefBits[2];
1581 uint64_t SplatBits, SplatUndef;
1583 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1584 || !isConstantSplat(VectorBits, UndefBits,
1585 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1586 SplatBits, SplatUndef, SplatSize))
1587 return SDOperand(); // Not a constant vector, not a splat.
1592 uint32_t Value32 = SplatBits;
1593 assert(SplatSize == 4
1594 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1595 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1596 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1597 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1598 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1602 uint64_t f64val = SplatBits;
1603 assert(SplatSize == 8
1604 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1605 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1606 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1607 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1608 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1612 // 8-bit constants have to be expanded to 16-bits
1613 unsigned short Value16 = SplatBits | (SplatBits << 8);
1615 for (int i = 0; i < 8; ++i)
1616 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1617 return DAG.getNode(ISD::BIT_CONVERT, VT,
1618 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1621 unsigned short Value16;
1623 Value16 = (unsigned short) (SplatBits & 0xffff);
1625 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1626 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1628 for (int i = 0; i < 8; ++i) Ops[i] = T;
1629 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1632 unsigned int Value = SplatBits;
1633 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1634 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1637 uint64_t val = SplatBits;
1638 uint32_t upper = uint32_t(val >> 32);
1639 uint32_t lower = uint32_t(val);
1644 SmallVector<SDOperand, 16> ShufBytes;
1646 bool upper_special, lower_special;
1648 // NOTE: This code creates common-case shuffle masks that can be easily
1649 // detected as common expressions. It is not attempting to create highly
1650 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1652 // Detect if the upper or lower half is a special shuffle mask pattern:
1653 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1654 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1656 // Create lower vector if not a special pattern
1657 if (!lower_special) {
1658 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1659 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1660 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1661 LO32C, LO32C, LO32C, LO32C));
1664 // Create upper vector if not a special pattern
1665 if (!upper_special) {
1666 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1667 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1668 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1669 HI32C, HI32C, HI32C, HI32C));
1672 // If either upper or lower are special, then the two input operands are
1673 // the same (basically, one of them is a "don't care")
1678 if (lower_special && upper_special) {
1679 // Unhappy situation... both upper and lower are special, so punt with
1680 // a target constant:
1681 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1682 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1686 for (int i = 0; i < 4; ++i) {
1687 for (int j = 0; j < 4; ++j) {
1689 bool process_upper, process_lower;
1692 process_upper = (upper_special && (i & 1) == 0);
1693 process_lower = (lower_special && (i & 1) == 1);
1695 if (process_upper || process_lower) {
1696 if ((process_upper && upper == 0)
1697 || (process_lower && lower == 0))
1699 else if ((process_upper && upper == 0xffffffff)
1700 || (process_lower && lower == 0xffffffff))
1702 else if ((process_upper && upper == 0x80000000)
1703 || (process_lower && lower == 0x80000000))
1704 val = (j == 0 ? 0xe0 : 0x80);
1706 val = i * 4 + j + ((i & 1) * 16);
1708 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1712 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1713 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1714 &ShufBytes[0], ShufBytes.size()));
1716 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1717 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1718 return DAG.getNode(ISD::BIT_CONVERT, VT,
1719 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1720 Zero, Zero, Zero, Zero));
1728 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1729 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1730 /// permutation vector, V3, is monotonically increasing with one "exception"
1731 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1732 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1733 /// In either case, the net result is going to eventually invoke SHUFB to
1734 /// permute/shuffle the bytes from V1 and V2.
1736 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1737 /// control word for byte/halfword/word insertion. This takes care of a single
1738 /// element move from V2 into V1.
1740 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1741 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1742 SDOperand V1 = Op.getOperand(0);
1743 SDOperand V2 = Op.getOperand(1);
1744 SDOperand PermMask = Op.getOperand(2);
1746 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1748 // If we have a single element being moved from V1 to V2, this can be handled
1749 // using the C*[DX] compute mask instructions, but the vector elements have
1750 // to be monotonically increasing with one exception element.
1751 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1752 unsigned EltsFromV2 = 0;
1754 unsigned V2EltIdx0 = 0;
1755 unsigned CurrElt = 0;
1756 bool monotonic = true;
1757 if (EltVT == MVT::i8)
1759 else if (EltVT == MVT::i16)
1761 else if (EltVT == MVT::i32)
1764 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1766 for (unsigned i = 0, e = PermMask.getNumOperands();
1767 EltsFromV2 <= 1 && monotonic && i != e;
1770 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1773 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1775 if (SrcElt >= V2EltIdx0) {
1777 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1778 } else if (CurrElt != SrcElt) {
1785 if (EltsFromV2 == 1 && monotonic) {
1786 // Compute mask and shuffle
1787 MachineFunction &MF = DAG.getMachineFunction();
1788 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1789 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1790 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1791 // Initialize temporary register to 0
1792 SDOperand InitTempReg =
1793 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1794 // Copy register's contents as index in INSERT_MASK:
1795 SDOperand ShufMaskOp =
1796 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1797 DAG.getTargetConstant(V2Elt, MVT::i32),
1798 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1799 // Use shuffle mask in SHUFB synthetic instruction:
1800 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1802 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1803 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1805 SmallVector<SDOperand, 16> ResultMask;
1806 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1808 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1811 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1813 for (unsigned j = 0; j != BytesPerElement; ++j) {
1814 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1819 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1820 &ResultMask[0], ResultMask.size());
1821 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1825 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1826 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1828 if (Op0.Val->getOpcode() == ISD::Constant) {
1829 // For a constant, build the appropriate constant vector, which will
1830 // eventually simplify to a vector register load.
1832 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1833 SmallVector<SDOperand, 16> ConstVecValues;
1837 // Create a constant vector:
1838 switch (Op.getValueType()) {
1839 default: assert(0 && "Unexpected constant value type in "
1840 "LowerSCALAR_TO_VECTOR");
1841 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1842 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1843 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1844 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1845 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1846 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1849 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1850 for (size_t j = 0; j < n_copies; ++j)
1851 ConstVecValues.push_back(CValue);
1853 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1854 &ConstVecValues[0], ConstVecValues.size());
1856 // Otherwise, copy the value from one register to another:
1857 switch (Op0.getValueType()) {
1858 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1865 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1872 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1873 switch (Op.getValueType()) {
1875 SDOperand rA = Op.getOperand(0);
1876 SDOperand rB = Op.getOperand(1);
1877 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1878 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1879 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1880 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1882 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1886 // Multiply two v8i16 vectors (pipeline friendly version):
1887 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1888 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1889 // c) Use SELB to select upper and lower halves from the intermediate results
1891 // NOTE: We really want to move the FSMBI to earlier to actually get the
1892 // dual-issue. This code does manage to do this, even if it's a little on
1895 MachineFunction &MF = DAG.getMachineFunction();
1896 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1897 SDOperand Chain = Op.getOperand(0);
1898 SDOperand rA = Op.getOperand(0);
1899 SDOperand rB = Op.getOperand(1);
1900 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1901 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1904 DAG.getCopyToReg(Chain, FSMBIreg,
1905 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1906 DAG.getConstant(0xcccc, MVT::i32)));
1909 DAG.getCopyToReg(FSMBOp, HiProdReg,
1910 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1912 SDOperand HHProd_v4i32 =
1913 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1914 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1916 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1917 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1918 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1919 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1921 DAG.getConstant(16, MVT::i16))),
1922 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1925 // This M00sE is N@stI! (apologies to Monty Python)
1927 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1928 // is to break it all apart, sign extend, and reassemble the various
1929 // intermediate products.
1931 MachineFunction &MF = DAG.getMachineFunction();
1932 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1933 SDOperand Chain = Op.getOperand(0);
1934 SDOperand rA = Op.getOperand(0);
1935 SDOperand rB = Op.getOperand(1);
1936 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1937 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1939 unsigned FSMBreg_2222 = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1940 unsigned LoProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1941 unsigned HiProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1944 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1945 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1946 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1948 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1950 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1953 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1954 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1956 SDOperand FSMBdef_2222 =
1957 DAG.getCopyToReg(Chain, FSMBreg_2222,
1958 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1959 DAG.getConstant(0x2222, MVT::i32)));
1961 SDOperand FSMBuse_2222 =
1962 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1964 SDOperand LoProd_1 =
1965 DAG.getCopyToReg(Chain, LoProd_reg,
1966 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1969 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1972 DAG.getNode(ISD::AND, MVT::v4i32,
1973 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1974 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1975 LoProdMask, LoProdMask,
1976 LoProdMask, LoProdMask));
1979 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1980 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1983 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1984 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1987 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1988 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1989 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1991 SDOperand HHProd_1 =
1992 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1993 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1994 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1995 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1996 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1999 DAG.getCopyToReg(Chain, HiProd_reg,
2000 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2002 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2006 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2007 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2009 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2010 DAG.getNode(ISD::OR, MVT::v4i32,
2015 cerr << "CellSPU: Unknown vector multiplication, got "
2016 << MVT::getValueTypeString(Op.getValueType())
2025 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2026 MachineFunction &MF = DAG.getMachineFunction();
2027 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2029 SDOperand A = Op.getOperand(0);
2030 SDOperand B = Op.getOperand(1);
2031 unsigned VT = Op.getValueType();
2033 unsigned VRegBR, VRegC;
2035 if (VT == MVT::f32) {
2036 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2037 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2039 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2040 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2042 // TODO: make sure we're feeding FPInterp the right arguments
2043 // Right now: fi B, frest(B)
2046 // (Floating Interpolate (FP Reciprocal Estimate B))
2048 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2049 DAG.getNode(SPUISD::FPInterp, VT, B,
2050 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2052 // Computes A * BRcpl and stores in a temporary register
2054 DAG.getCopyToReg(BRcpl, VRegC,
2055 DAG.getNode(ISD::FMUL, VT, A,
2056 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2057 // What's the Chain variable do? It's magic!
2058 // TODO: set Chain = Op(0).getEntryNode()
2060 return DAG.getNode(ISD::FADD, VT,
2061 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2062 DAG.getNode(ISD::FMUL, VT,
2063 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2064 DAG.getNode(ISD::FSUB, VT, A,
2065 DAG.getNode(ISD::FMUL, VT, B,
2066 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2069 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2070 unsigned VT = Op.getValueType();
2071 SDOperand N = Op.getOperand(0);
2072 SDOperand Elt = Op.getOperand(1);
2073 SDOperand ShufMask[16];
2074 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2076 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2078 int EltNo = (int) C->getValue();
2081 if (VT == MVT::i8 && EltNo >= 16)
2082 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2083 else if (VT == MVT::i16 && EltNo >= 8)
2084 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2085 else if (VT == MVT::i32 && EltNo >= 4)
2086 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2087 else if (VT == MVT::i64 && EltNo >= 2)
2088 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2090 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2091 // i32 and i64: Element 0 is the preferred slot
2092 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2095 // Need to generate shuffle mask and extract:
2096 int prefslot_begin = -1, prefslot_end = -1;
2097 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2101 prefslot_begin = prefslot_end = 3;
2105 prefslot_begin = 2; prefslot_end = 3;
2109 prefslot_begin = 0; prefslot_end = 3;
2113 prefslot_begin = 0; prefslot_end = 7;
2118 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2119 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2121 for (int i = 0; i < 16; ++i) {
2122 // zero fill uppper part of preferred slot, don't care about the
2124 unsigned int mask_val;
2126 if (i <= prefslot_end) {
2128 ((i < prefslot_begin)
2130 : elt_byte + (i - prefslot_begin));
2132 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2134 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2137 SDOperand ShufMaskVec =
2138 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2140 sizeof(ShufMask) / sizeof(ShufMask[0]));
2142 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2143 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2144 N, N, ShufMaskVec));
2148 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2149 SDOperand VecOp = Op.getOperand(0);
2150 SDOperand ValOp = Op.getOperand(1);
2151 SDOperand IdxOp = Op.getOperand(2);
2152 MVT::ValueType VT = Op.getValueType();
2154 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2155 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2157 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2158 // Use $2 because it's always 16-byte aligned and it's available:
2159 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2162 DAG.getNode(SPUISD::SHUFB, VT,
2163 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2165 DAG.getNode(SPUISD::INSERT_MASK, VT,
2166 DAG.getNode(ISD::ADD, PtrVT,
2168 DAG.getConstant(CN->getValue(),
2174 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2175 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2177 assert(Op.getValueType() == MVT::i8);
2180 assert(0 && "Unhandled i8 math operator");
2184 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2186 SDOperand N1 = Op.getOperand(1);
2187 N0 = (N0.getOpcode() != ISD::Constant
2188 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2189 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2190 N1 = (N1.getOpcode() != ISD::Constant
2191 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2192 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2193 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2194 DAG.getNode(Opc, MVT::i16, N0, N1));
2198 SDOperand N1 = Op.getOperand(1);
2200 N0 = (N0.getOpcode() != ISD::Constant
2201 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2202 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2203 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2204 N1 = (N1.getOpcode() != ISD::Constant
2205 ? DAG.getNode(N1Opc, MVT::i16, N1)
2206 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2207 SDOperand ExpandArg =
2208 DAG.getNode(ISD::OR, MVT::i16, N0,
2209 DAG.getNode(ISD::SHL, MVT::i16,
2210 N0, DAG.getConstant(8, MVT::i16)));
2211 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2212 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2216 SDOperand N1 = Op.getOperand(1);
2218 N0 = (N0.getOpcode() != ISD::Constant
2219 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2220 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2221 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2222 N1 = (N1.getOpcode() != ISD::Constant
2223 ? DAG.getNode(N1Opc, MVT::i16, N1)
2224 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2225 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2226 DAG.getNode(Opc, MVT::i16, N0, N1));
2229 SDOperand N1 = Op.getOperand(1);
2231 N0 = (N0.getOpcode() != ISD::Constant
2232 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2233 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2234 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2235 N1 = (N1.getOpcode() != ISD::Constant
2236 ? DAG.getNode(N1Opc, MVT::i16, N1)
2237 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2238 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2239 DAG.getNode(Opc, MVT::i16, N0, N1));
2242 SDOperand N1 = Op.getOperand(1);
2244 N0 = (N0.getOpcode() != ISD::Constant
2245 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2246 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2247 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2248 N1 = (N1.getOpcode() != ISD::Constant
2249 ? DAG.getNode(N1Opc, MVT::i16, N1)
2250 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2251 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2252 DAG.getNode(Opc, MVT::i16, N0, N1));
2260 //! Lower byte immediate operations for v16i8 vectors:
2262 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2265 MVT::ValueType VT = Op.getValueType();
2267 ConstVec = Op.getOperand(0);
2268 Arg = Op.getOperand(1);
2269 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2270 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2271 ConstVec = ConstVec.getOperand(0);
2273 ConstVec = Op.getOperand(1);
2274 Arg = Op.getOperand(0);
2275 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2276 ConstVec = ConstVec.getOperand(0);
2281 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2282 uint64_t VectorBits[2];
2283 uint64_t UndefBits[2];
2284 uint64_t SplatBits, SplatUndef;
2287 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2288 && isConstantSplat(VectorBits, UndefBits,
2289 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2290 SplatBits, SplatUndef, SplatSize)) {
2291 SDOperand tcVec[16];
2292 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2293 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2295 // Turn the BUILD_VECTOR into a set of target constants:
2296 for (size_t i = 0; i < tcVecSize; ++i)
2299 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2300 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2307 //! Lower i32 multiplication
2308 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2312 cerr << "CellSPU: Unknown LowerMUL value type, got "
2313 << MVT::getValueTypeString(Op.getValueType())
2319 SDOperand rA = Op.getOperand(0);
2320 SDOperand rB = Op.getOperand(1);
2322 return DAG.getNode(ISD::ADD, MVT::i32,
2323 DAG.getNode(ISD::ADD, MVT::i32,
2324 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2325 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2326 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2333 //! Custom lowering for CTPOP (count population)
2335 Custom lowering code that counts the number ones in the input
2336 operand. SPU has such an instruction, but it counts the number of
2337 ones per byte, which then have to be accumulated.
2339 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2340 unsigned VT = Op.getValueType();
2341 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2345 SDOperand N = Op.getOperand(0);
2346 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2348 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2349 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2351 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2355 MachineFunction &MF = DAG.getMachineFunction();
2356 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2358 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2360 SDOperand N = Op.getOperand(0);
2361 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2362 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2363 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2365 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2366 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2368 // CNTB_result becomes the chain to which all of the virtual registers
2369 // CNTB_reg, SUM1_reg become associated:
2370 SDOperand CNTB_result =
2371 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2373 SDOperand CNTB_rescopy =
2374 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2376 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2378 return DAG.getNode(ISD::AND, MVT::i16,
2379 DAG.getNode(ISD::ADD, MVT::i16,
2380 DAG.getNode(ISD::SRL, MVT::i16,
2387 MachineFunction &MF = DAG.getMachineFunction();
2388 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2390 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2391 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2393 SDOperand N = Op.getOperand(0);
2394 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2395 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2396 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2397 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2399 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2400 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2402 // CNTB_result becomes the chain to which all of the virtual registers
2403 // CNTB_reg, SUM1_reg become associated:
2404 SDOperand CNTB_result =
2405 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2407 SDOperand CNTB_rescopy =
2408 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2411 DAG.getNode(ISD::SRL, MVT::i32,
2412 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2415 DAG.getNode(ISD::ADD, MVT::i32,
2416 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2418 SDOperand Sum1_rescopy =
2419 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2422 DAG.getNode(ISD::SRL, MVT::i32,
2423 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2426 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2427 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2429 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2439 /// LowerOperation - Provide custom lowering hooks for some operations.
2442 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2444 switch (Op.getOpcode()) {
2446 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2447 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2448 cerr << "*Op.Val:\n";
2455 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2457 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2458 case ISD::ConstantPool:
2459 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2460 case ISD::GlobalAddress:
2461 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2462 case ISD::JumpTable:
2463 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2465 return LowerConstant(Op, DAG);
2466 case ISD::ConstantFP:
2467 return LowerConstantFP(Op, DAG);
2468 case ISD::FORMAL_ARGUMENTS:
2469 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2471 return LowerCALL(Op, DAG);
2473 return LowerRET(Op, DAG, getTargetMachine());
2482 return LowerI8Math(Op, DAG, Op.getOpcode());
2484 // Vector-related lowering.
2485 case ISD::BUILD_VECTOR:
2486 return LowerBUILD_VECTOR(Op, DAG);
2487 case ISD::SCALAR_TO_VECTOR:
2488 return LowerSCALAR_TO_VECTOR(Op, DAG);
2489 case ISD::VECTOR_SHUFFLE:
2490 return LowerVECTOR_SHUFFLE(Op, DAG);
2491 case ISD::EXTRACT_VECTOR_ELT:
2492 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2493 case ISD::INSERT_VECTOR_ELT:
2494 return LowerINSERT_VECTOR_ELT(Op, DAG);
2496 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2500 return LowerByteImmed(Op, DAG);
2502 // Vector and i8 multiply:
2504 if (MVT::isVector(Op.getValueType()))
2505 return LowerVectorMUL(Op, DAG);
2506 else if (Op.getValueType() == MVT::i8)
2507 return LowerI8Math(Op, DAG, Op.getOpcode());
2509 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2512 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2513 return LowerFDIVf32(Op, DAG);
2514 // else if (Op.getValueType() == MVT::f64)
2515 // return LowerFDIVf64(Op, DAG);
2517 assert(0 && "Calling FDIV on unsupported MVT");
2520 return LowerCTPOP(Op, DAG);
2526 //===----------------------------------------------------------------------===//
2527 // Other Lowering Code
2528 //===----------------------------------------------------------------------===//
2531 SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2532 MachineBasicBlock *BB)
2537 //===----------------------------------------------------------------------===//
2538 // Target Optimization Hooks
2539 //===----------------------------------------------------------------------===//
2542 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2545 TargetMachine &TM = getTargetMachine();
2546 SelectionDAG &DAG = DCI.DAG;
2548 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2550 switch (N->getOpcode()) {
2553 // Look for obvious optimizations for shift left:
2554 // a) Replace 0 << V with 0
2555 // b) Replace V << 0 with V
2557 // N.B: llvm will generate an undef node if the shift amount is greater than
2558 // 15 (e.g.: V << 16), which will naturally trigger an assert.
2561 case SPU::SHLQBIIvec:
2563 case SPU::ROTHIr16_i32:
2565 case SPU::ROTIr32_i16:
2566 case SPU::ROTQBYIvec:
2567 case SPU::ROTQBYBIvec:
2568 case SPU::ROTQBIIvec:
2569 case SPU::ROTHMIr16:
2571 case SPU::ROTQMBYIvec: {
2572 if (N0.getOpcode() == ISD::Constant) {
2573 if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2574 if (C->getValue() == 0) // 0 << V -> 0.
2578 SDOperand N1 = N->getOperand(1);
2579 if (N1.getOpcode() == ISD::Constant) {
2580 if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2581 if (C->getValue() == 0) // V << 0 -> V
2592 //===----------------------------------------------------------------------===//
2593 // Inline Assembly Support
2594 //===----------------------------------------------------------------------===//
2596 /// getConstraintType - Given a constraint letter, return the type of
2597 /// constraint it is for this target.
2598 SPUTargetLowering::ConstraintType
2599 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2600 if (ConstraintLetter.size() == 1) {
2601 switch (ConstraintLetter[0]) {
2608 return C_RegisterClass;
2611 return TargetLowering::getConstraintType(ConstraintLetter);
2614 std::pair<unsigned, const TargetRegisterClass*>
2615 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2616 MVT::ValueType VT) const
2618 if (Constraint.size() == 1) {
2619 // GCC RS6000 Constraint Letters
2620 switch (Constraint[0]) {
2624 return std::make_pair(0U, SPU::R64CRegisterClass);
2625 return std::make_pair(0U, SPU::R32CRegisterClass);
2628 return std::make_pair(0U, SPU::R32FPRegisterClass);
2629 else if (VT == MVT::f64)
2630 return std::make_pair(0U, SPU::R64FPRegisterClass);
2633 return std::make_pair(0U, SPU::GPRCRegisterClass);
2637 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2641 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2643 uint64_t &KnownZero,
2645 const SelectionDAG &DAG,
2646 unsigned Depth ) const {
2651 // LowerAsmOperandForConstraint
2653 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2654 char ConstraintLetter,
2655 std::vector<SDOperand> &Ops,
2656 SelectionDAG &DAG) {
2657 // Default, for the time being, to the base class handler
2658 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2661 /// isLegalAddressImmediate - Return true if the integer value can be used
2662 /// as the offset of the target addressing mode.
2663 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2664 // SPU's addresses are 256K:
2665 return (V > -(1 << 18) && V < (1 << 18) - 1);
2668 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {