1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "llvm/ADT/VectorExtras.h"
18 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/SSARegMap.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT::ValueType mapping to useful data for Cell SPU
41 struct valtype_map_s {
42 const MVT::ValueType valtype;
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
72 << MVT::getValueTypeString(VT)
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an existing D-Form
88 bool isMemoryOperand(const SDOperand &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::FrameIndex
94 || Opc == ISD::JumpTable
95 || Opc == ISD::ConstantPool
96 || Opc == ISD::ExternalSymbol
97 || Opc == ISD::TargetGlobalAddress
98 || Opc == ISD::TargetGlobalTLSAddress
99 || Opc == ISD::TargetFrameIndex
100 || Opc == ISD::TargetJumpTable
101 || Opc == ISD::TargetConstantPool
102 || Opc == ISD::TargetExternalSymbol
103 || Opc == SPUISD::DFormAddr);
107 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
108 : TargetLowering(TM),
111 // Fold away setcc operations if possible.
114 // Use _setjmp/_longjmp instead of setjmp/longjmp.
115 setUseUnderscoreSetJmp(true);
116 setUseUnderscoreLongJmp(true);
118 // Set up the SPU's register classes:
119 // NOTE: i8 register class is not registered because we cannot determine when
120 // we need to zero or sign extend for custom-lowered loads and stores.
121 // NOTE: Ignore the previous note. For now. :-)
122 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
123 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
124 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
125 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
126 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
127 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
128 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
130 // SPU has no sign or zero extended loads for i1, i8, i16:
131 setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
132 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
133 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
134 setStoreXAction(MVT::i1, Custom);
136 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
137 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
138 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
139 setStoreXAction(MVT::i8, Custom);
141 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
142 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
143 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
145 // SPU constant load actions are custom lowered:
146 setOperationAction(ISD::Constant, MVT::i64, Custom);
147 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
148 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
150 // SPU's loads and stores have to be custom lowered:
151 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
153 setOperationAction(ISD::LOAD, sctype, Custom);
154 setOperationAction(ISD::STORE, sctype, Custom);
157 // SPU supports BRCOND, although DAGCombine will convert BRCONDs
158 // into BR_CCs. BR_CC instructions are custom selected in
160 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
162 // Expand the jumptable branches
163 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
164 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
165 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
167 // SPU has no intrinsics for these particular operations:
168 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
169 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
170 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
172 // PowerPC has no SREM/UREM instructions
173 setOperationAction(ISD::SREM, MVT::i32, Expand);
174 setOperationAction(ISD::UREM, MVT::i32, Expand);
175 setOperationAction(ISD::SREM, MVT::i64, Expand);
176 setOperationAction(ISD::UREM, MVT::i64, Expand);
178 // We don't support sin/cos/sqrt/fmod
179 setOperationAction(ISD::FSIN , MVT::f64, Expand);
180 setOperationAction(ISD::FCOS , MVT::f64, Expand);
181 setOperationAction(ISD::FREM , MVT::f64, Expand);
182 setOperationAction(ISD::FSIN , MVT::f32, Expand);
183 setOperationAction(ISD::FCOS , MVT::f32, Expand);
184 setOperationAction(ISD::FREM , MVT::f32, Expand);
186 // If we're enabling GP optimizations, use hardware square root
187 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
188 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
190 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
191 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
193 // SPU can do rotate right and left, so legalize it... but customize for i8
194 // because instructions don't exist.
195 setOperationAction(ISD::ROTR, MVT::i32, Legal);
196 setOperationAction(ISD::ROTR, MVT::i16, Legal);
197 setOperationAction(ISD::ROTR, MVT::i8, Custom);
198 setOperationAction(ISD::ROTL, MVT::i32, Legal);
199 setOperationAction(ISD::ROTL, MVT::i16, Legal);
200 setOperationAction(ISD::ROTL, MVT::i8, Custom);
201 // SPU has no native version of shift left/right for i8
202 setOperationAction(ISD::SHL, MVT::i8, Custom);
203 setOperationAction(ISD::SRL, MVT::i8, Custom);
204 setOperationAction(ISD::SRA, MVT::i8, Custom);
206 // Custom lower i32 multiplications
207 setOperationAction(ISD::MUL, MVT::i32, Custom);
209 // Need to custom handle (some) common i8 math ops
210 setOperationAction(ISD::SUB, MVT::i8, Custom);
211 setOperationAction(ISD::MUL, MVT::i8, Custom);
213 // SPU does not have BSWAP. It does have i32 support CTLZ.
214 // CTPOP has to be custom lowered.
215 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
216 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
218 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
219 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
220 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
221 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
223 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
224 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
226 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
228 // SPU does not have select or setcc
229 setOperationAction(ISD::SELECT, MVT::i1, Expand);
230 setOperationAction(ISD::SELECT, MVT::i8, Expand);
231 setOperationAction(ISD::SELECT, MVT::i16, Expand);
232 setOperationAction(ISD::SELECT, MVT::i32, Expand);
233 setOperationAction(ISD::SELECT, MVT::i64, Expand);
234 setOperationAction(ISD::SELECT, MVT::f32, Expand);
235 setOperationAction(ISD::SELECT, MVT::f64, Expand);
237 setOperationAction(ISD::SETCC, MVT::i1, Expand);
238 setOperationAction(ISD::SETCC, MVT::i8, Expand);
239 setOperationAction(ISD::SETCC, MVT::i16, Expand);
240 setOperationAction(ISD::SETCC, MVT::i32, Expand);
241 setOperationAction(ISD::SETCC, MVT::i64, Expand);
242 setOperationAction(ISD::SETCC, MVT::f32, Expand);
243 setOperationAction(ISD::SETCC, MVT::f64, Expand);
245 // SPU has a legal FP -> signed INT instruction
246 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
247 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
248 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
249 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
251 // FDIV on SPU requires custom lowering
252 setOperationAction(ISD::FDIV, MVT::f32, Custom);
253 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
255 // SPU has [U|S]INT_TO_FP
256 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
257 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
258 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
259 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
260 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
261 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
262 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
263 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
265 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
266 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
267 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
268 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
270 // We cannot sextinreg(i1). Expand to shifts.
271 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
273 // Support label based line numbers.
274 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
275 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
277 // We want to legalize GlobalAddress and ConstantPool nodes into the
278 // appropriate instructions to materialize the address.
279 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
280 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
281 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
282 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
283 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
284 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
285 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
286 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
288 // RET must be custom lowered, to meet ABI requirements
289 setOperationAction(ISD::RET, MVT::Other, Custom);
291 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
292 setOperationAction(ISD::VASTART , MVT::Other, Custom);
294 // Use the default implementation.
295 setOperationAction(ISD::VAARG , MVT::Other, Expand);
296 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
297 setOperationAction(ISD::VAEND , MVT::Other, Expand);
298 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
299 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
300 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
301 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
303 // Cell SPU has instructions for converting between i64 and fp.
304 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
305 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
307 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
308 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
310 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
311 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
313 // First set operation action for all vector types to expand. Then we
314 // will selectively turn on ones that can be effectively codegen'd.
315 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
316 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
317 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
318 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
319 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
320 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
322 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
323 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
324 // add/sub are legal for all supported vector VT's.
325 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
326 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
327 // mul has to be custom lowered.
328 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
330 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
331 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
332 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
333 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
334 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
335 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
337 // These operations need to be expanded:
338 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
339 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
340 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
341 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
342 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
344 // Custom lower build_vector, constant pool spills, insert and
345 // extract vector elements:
346 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
347 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
348 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
349 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
350 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
351 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
354 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
355 setOperationAction(ISD::AND, MVT::v16i8, Custom);
356 setOperationAction(ISD::OR, MVT::v16i8, Custom);
357 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
358 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
360 setSetCCResultType(MVT::i32);
361 setShiftAmountType(MVT::i32);
362 setSetCCResultContents(ZeroOrOneSetCCResult);
364 setStackPointerRegisterToSaveRestore(SPU::R1);
366 // We have target-specific dag combine patterns for the following nodes:
367 // e.g., setTargetDAGCombine(ISD::SUB);
369 computeRegisterProperties();
373 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
375 if (node_names.empty()) {
376 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
377 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
378 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
379 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
380 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
381 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
382 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
383 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
384 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
385 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
386 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
387 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
388 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
389 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
390 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
391 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
392 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
393 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
394 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
395 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
396 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
397 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
398 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
399 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
400 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
401 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
402 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
403 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
404 "SPUISD::ROTBYTES_RIGHT_Z";
405 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
406 "SPUISD::ROTBYTES_RIGHT_S";
407 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
408 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
409 "SPUISD::ROTBYTES_LEFT_CHAINED";
410 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
411 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
412 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
413 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
414 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
415 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
418 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
420 return ((i != node_names.end()) ? i->second : 0);
423 //===----------------------------------------------------------------------===//
424 // Calling convention code:
425 //===----------------------------------------------------------------------===//
427 #include "SPUGenCallingConv.inc"
429 //===----------------------------------------------------------------------===//
430 // LowerOperation implementation
431 //===----------------------------------------------------------------------===//
433 /// Custom lower loads for CellSPU
435 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
436 within a 16-byte block, we have to rotate to extract the requested element.
439 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
440 LoadSDNode *LN = cast<LoadSDNode>(Op);
441 SDOperand basep = LN->getBasePtr();
442 SDOperand the_chain = LN->getChain();
443 MVT::ValueType BasepOpc = basep.Val->getOpcode();
444 MVT::ValueType VT = LN->getLoadedVT();
445 MVT::ValueType OpVT = Op.Val->getValueType(0);
446 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
447 ISD::LoadExtType ExtType = LN->getExtensionType();
448 unsigned alignment = LN->getAlignment();
449 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
452 if (BasepOpc == ISD::FrameIndex) {
453 // Loading from a frame index is always properly aligned. Always.
457 // For an extending load of an i1 variable, just call it i8 (or whatever we
458 // were passed) and make it zero-extended:
461 ExtType = ISD::ZEXTLOAD;
464 switch (LN->getAddressingMode()) {
465 case ISD::UNINDEXED: {
467 SDOperand rot_op, rotamt;
472 // The vector type we really want to be when we load the 16-byte chunk
473 MVT::ValueType vecVT, opVecVT;
477 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
478 opVecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
480 if (basep.getOpcode() == ISD::ADD) {
481 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
484 && "LowerLOAD: ISD::ADD operand 1 is not constant");
486 c_offset = (int) CN->getValue();
487 c_rotamt = (int) (c_offset & 0xf);
489 // Adjust the rotation amount to ensure that the final result ends up in
490 // the preferred slot:
491 c_rotamt -= vtm->prefslot_byte;
492 ptrp = basep.getOperand(0);
495 c_rotamt = -vtm->prefslot_byte;
499 if (alignment == 16) {
500 // 16-byte aligned load into preferred slot, no rotation
502 if (isMemoryOperand(ptrp))
506 // Return modified D-Form address for pointer:
507 ptrp = DAG.getNode(SPUISD::DFormAddr, PtrVT,
508 ptrp, DAG.getConstant((c_offset & ~0xf), PtrVT));
510 return DAG.getLoad(VT, LN->getChain(), ptrp,
511 LN->getSrcValue(), LN->getSrcValueOffset(),
512 LN->isVolatile(), 16);
514 return DAG.getExtLoad(ExtType, VT, LN->getChain(), ptrp, LN->getSrcValue(),
515 LN->getSrcValueOffset(), OpVT,
516 LN->isVolatile(), 16);
522 // Realign the base pointer, with a D-Form address
523 if ((c_offset & ~0xf) != 0 || !isMemoryOperand(ptrp))
524 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT,
525 ptrp, DAG.getConstant((c_offset & ~0xf), MVT::i32));
530 rot_op = DAG.getLoad(MVT::v16i8, the_chain, basep,
531 LN->getSrcValue(), LN->getSrcValueOffset(),
532 LN->isVolatile(), 16);
533 the_chain = rot_op.getValue(1);
534 rotamt = DAG.getConstant(c_rotamt, MVT::i16);
536 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
541 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
542 the_chain = result.getValue(1);
544 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
549 scalarvts = DAG.getVTList(VT, MVT::Other);
551 scalarvts = DAG.getVTList(OpVT, MVT::Other);
554 result = DAG.getNode(ISD::BIT_CONVERT, (OpVT == VT ? vecVT : opVecVT),
558 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
559 the_chain = result.getValue(1);
561 // Handle the sign and zero-extending loads for i1 and i8:
564 if (ExtType == ISD::SEXTLOAD) {
565 NewOpC = (OpVT == MVT::i1
566 ? SPUISD::EXTRACT_I1_SEXT
567 : SPUISD::EXTRACT_I8_SEXT);
569 assert(ExtType == ISD::ZEXTLOAD);
570 NewOpC = (OpVT == MVT::i1
571 ? SPUISD::EXTRACT_I1_ZEXT
572 : SPUISD::EXTRACT_I8_ZEXT);
575 result = DAG.getNode(NewOpC, OpVT, result);
578 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
579 SDOperand retops[2] = { result, the_chain };
581 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
586 // Misaligned 16-byte load:
587 if (basep.getOpcode() == ISD::LOAD) {
588 LN = cast<LoadSDNode>(basep);
589 if (LN->getAlignment() == 16) {
590 // We can verify that we're really loading from a 16-byte aligned
591 // chunk. Encapsulate basep as a D-Form address and return a new
593 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, basep,
594 DAG.getConstant(0, PtrVT));
596 return DAG.getLoad(VT, LN->getChain(), basep,
597 LN->getSrcValue(), LN->getSrcValueOffset(),
598 LN->isVolatile(), 16);
600 return DAG.getExtLoad(ExtType, VT, LN->getChain(), basep,
601 LN->getSrcValue(), LN->getSrcValueOffset(),
602 OpVT, LN->isVolatile(), 16);
606 // Catch all other cases where we can't guarantee that we have a
607 // 16-byte aligned entity, which means resorting to an X-form
610 SDOperand ZeroOffs = DAG.getConstant(0, PtrVT);
611 SDOperand loOp = DAG.getNode(SPUISD::Lo, PtrVT, basep, ZeroOffs);
612 SDOperand hiOp = DAG.getNode(SPUISD::Hi, PtrVT, basep, ZeroOffs);
614 ptrp = DAG.getNode(ISD::ADD, PtrVT, loOp, hiOp);
616 SDOperand alignLoad =
617 DAG.getLoad(opVecVT, LN->getChain(), ptrp,
618 LN->getSrcValue(), LN->getSrcValueOffset(),
619 LN->isVolatile(), 16);
621 SDOperand insertEltOp =
622 DAG.getNode(SPUISD::INSERT_MASK, vecVT, ptrp);
624 result = DAG.getNode(SPUISD::SHUFB, opVecVT,
627 DAG.getNode(ISD::BIT_CONVERT, opVecVT, insertEltOp));
629 result = DAG.getNode(SPUISD::EXTRACT_ELT0, OpVT, result);
631 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
632 SDOperand retops[2] = { result, the_chain };
634 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
643 case ISD::LAST_INDEXED_MODE:
644 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
646 cerr << (unsigned) LN->getAddressingMode() << "\n";
654 /// Custom lower stores for CellSPU
656 All CellSPU stores are aligned to 16-byte boundaries, so for elements
657 within a 16-byte block, we have to generate a shuffle to insert the
658 requested element into its place, then store the resulting block.
661 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
662 StoreSDNode *SN = cast<StoreSDNode>(Op);
663 SDOperand Value = SN->getValue();
664 MVT::ValueType VT = Value.getValueType();
665 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
666 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
667 SDOperand the_chain = SN->getChain();
668 //unsigned alignment = SN->getAlignment();
669 //const valtype_map_s *vtm = getValueTypeMapEntry(VT);
671 switch (SN->getAddressingMode()) {
672 case ISD::UNINDEXED: {
673 SDOperand basep = SN->getBasePtr();
677 if (basep.getOpcode() == ISD::FrameIndex) {
678 // FrameIndex nodes are always properly aligned. Really.
682 if (basep.getOpcode() == ISD::ADD) {
683 const ConstantSDNode *CN = cast<ConstantSDNode>(basep.Val->getOperand(1));
685 && "LowerSTORE: ISD::ADD operand 1 is not constant");
686 offset = unsigned(CN->getValue());
687 ptrOp = basep.getOperand(0);
688 DEBUG(cerr << "LowerSTORE: StoreSDNode ISD:ADD offset = "
696 // The vector type we really want to load from the 16-byte chunk, except
697 // in the case of MVT::i1, which has to be v16i8.
698 unsigned vecVT, stVecVT;
701 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
703 stVecVT = MVT::v16i8;
704 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
706 // Realign the pointer as a D-Form address (ptrOp is the pointer, basep is
707 // the actual dform addr offs($reg).
708 basep = DAG.getNode(SPUISD::DFormAddr, PtrVT, ptrOp,
709 DAG.getConstant((offset & ~0xf), PtrVT));
711 // Create the 16-byte aligned vector load
712 SDOperand alignLoad =
713 DAG.getLoad(vecVT, the_chain, basep,
714 SN->getSrcValue(), SN->getSrcValueOffset(),
715 SN->isVolatile(), 16);
716 the_chain = alignLoad.getValue(1);
718 LoadSDNode *LN = cast<LoadSDNode>(alignLoad);
719 SDOperand theValue = SN->getValue();
723 && (theValue.getOpcode() == ISD::AssertZext
724 || theValue.getOpcode() == ISD::AssertSext)) {
725 // Drill down and get the value for zero- and sign-extended
727 theValue = theValue.getOperand(0);
730 SDOperand insertEltOp =
731 DAG.getNode(SPUISD::INSERT_MASK, stVecVT,
732 DAG.getNode(SPUISD::DFormAddr, PtrVT,
734 DAG.getConstant((offset & 0xf), PtrVT)));
736 result = DAG.getNode(SPUISD::SHUFB, vecVT,
737 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
739 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
741 result = DAG.getStore(the_chain, result, basep,
742 LN->getSrcValue(), LN->getSrcValueOffset(),
743 LN->isVolatile(), LN->getAlignment());
752 case ISD::LAST_INDEXED_MODE:
753 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
755 cerr << (unsigned) SN->getAddressingMode() << "\n";
763 /// Generate the address of a constant pool entry.
765 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
766 MVT::ValueType PtrVT = Op.getValueType();
767 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
768 Constant *C = CP->getConstVal();
769 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
770 const TargetMachine &TM = DAG.getTarget();
771 SDOperand Zero = DAG.getConstant(0, PtrVT);
773 if (TM.getRelocationModel() == Reloc::Static) {
774 if (!ST->usingLargeMem()) {
775 // Just return the SDOperand with the constant pool address in it.
778 // Generate hi/lo address pair
779 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
780 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
782 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
787 "LowerConstantPool: Relocation model other than static not supported.");
792 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
793 MVT::ValueType PtrVT = Op.getValueType();
794 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
795 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
796 SDOperand Zero = DAG.getConstant(0, PtrVT);
797 const TargetMachine &TM = DAG.getTarget();
799 if (TM.getRelocationModel() == Reloc::Static) {
800 if (!ST->usingLargeMem()) {
801 // Just return the SDOperand with the jump table address in it.
804 // Generate hi/lo address pair
805 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
806 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
808 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
813 "LowerJumpTable: Relocation model other than static not supported.");
818 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
819 MVT::ValueType PtrVT = Op.getValueType();
820 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
821 GlobalValue *GV = GSDN->getGlobal();
822 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
823 SDOperand Zero = DAG.getConstant(0, PtrVT);
824 const TargetMachine &TM = DAG.getTarget();
826 if (TM.getRelocationModel() == Reloc::Static) {
827 if (!ST->usingLargeMem()) {
828 // Generate a local store address
831 // Generate hi/lo address pair
832 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
833 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
835 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
838 cerr << "LowerGlobalAddress: Relocation model other than static not "
847 //! Custom lower i64 integer constants
849 This code inserts all of the necessary juggling that needs to occur to load
850 a 64-bit constant into a register.
853 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
854 unsigned VT = Op.getValueType();
855 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
857 if (VT == MVT::i64) {
858 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
859 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
860 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
863 cerr << "LowerConstant: unhandled constant type "
864 << MVT::getValueTypeString(VT)
873 //! Custom lower single precision floating point constants
875 "float" immediates can be lowered as if they were unsigned 32-bit integers.
876 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
880 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
881 unsigned VT = Op.getValueType();
882 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
885 "LowerConstantFP: Node is not ConstantFPSDNode");
887 if (VT == MVT::f32) {
888 float targetConst = FP->getValueAPF().convertToFloat();
889 return DAG.getNode(SPUISD::SFPConstant, VT,
890 DAG.getTargetConstantFP(targetConst, VT));
891 } else if (VT == MVT::f64) {
892 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
893 return DAG.getNode(ISD::BIT_CONVERT, VT,
894 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
901 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
903 MachineFunction &MF = DAG.getMachineFunction();
904 MachineFrameInfo *MFI = MF.getFrameInfo();
905 SSARegMap *RegMap = MF.getSSARegMap();
906 SmallVector<SDOperand, 8> ArgValues;
907 SDOperand Root = Op.getOperand(0);
908 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
910 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
911 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
913 unsigned ArgOffset = SPUFrameInfo::minStackSize();
914 unsigned ArgRegIdx = 0;
915 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
917 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
919 // Add DAG nodes to load the arguments or copy them out of registers.
920 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
922 bool needsLoad = false;
923 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
924 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
928 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
929 << MVT::getValueTypeString(ObjectVT)
934 if (!isVarArg && ArgRegIdx < NumArgRegs) {
935 unsigned VReg = RegMap->createVirtualRegister(&SPU::R8CRegClass);
936 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
937 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
944 if (!isVarArg && ArgRegIdx < NumArgRegs) {
945 unsigned VReg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
946 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
947 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
954 if (!isVarArg && ArgRegIdx < NumArgRegs) {
955 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
956 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
957 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
964 if (!isVarArg && ArgRegIdx < NumArgRegs) {
965 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64CRegClass);
966 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
967 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
974 if (!isVarArg && ArgRegIdx < NumArgRegs) {
975 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
976 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
977 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
984 if (!isVarArg && ArgRegIdx < NumArgRegs) {
985 unsigned VReg = RegMap->createVirtualRegister(&SPU::R64FPRegClass);
986 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
987 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
998 if (!isVarArg && ArgRegIdx < NumArgRegs) {
999 unsigned VReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1000 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1009 // We need to load the argument to a virtual register if we determined above
1010 // that we ran out of physical registers of the appropriate type
1012 // If the argument is actually used, emit a load from the right stack
1014 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
1015 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1016 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1017 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1019 // Don't emit a dead load.
1020 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1023 ArgOffset += StackSlotSize;
1026 ArgValues.push_back(ArgVal);
1029 // If the function takes variable number of arguments, make a frame index for
1030 // the start of the first vararg value... for expansion of llvm.va_start.
1032 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1034 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1035 // If this function is vararg, store any remaining integer argument regs to
1036 // their spots on the stack so that they may be loaded by deferencing the
1037 // result of va_next.
1038 SmallVector<SDOperand, 8> MemOps;
1039 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1040 unsigned VReg = RegMap->createVirtualRegister(&SPU::GPRCRegClass);
1041 MF.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1042 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1043 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1044 MemOps.push_back(Store);
1045 // Increment the address by four for the next argument to store
1046 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1047 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1049 if (!MemOps.empty())
1050 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1053 ArgValues.push_back(Root);
1055 // Return the new list of results.
1056 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1057 Op.Val->value_end());
1058 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1061 /// isLSAAddress - Return the immediate to use if the specified
1062 /// value is representable as a LSA address.
1063 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1064 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1067 int Addr = C->getValue();
1068 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1069 (Addr << 14 >> 14) != Addr)
1070 return 0; // Top 14 bits have to be sext of immediate.
1072 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1077 LowerCALL(SDOperand Op, SelectionDAG &DAG) {
1078 SDOperand Chain = Op.getOperand(0);
1080 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1081 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1083 SDOperand Callee = Op.getOperand(4);
1084 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1085 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1086 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1087 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1089 // Handy pointer type
1090 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1092 // Accumulate how many bytes are to be pushed on the stack, including the
1093 // linkage area, and parameter passing area. According to the SPU ABI,
1094 // we minimally need space for [LR] and [SP]
1095 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1097 // Set up a copy of the stack pointer for use loading and storing any
1098 // arguments that may not fit in the registers available for argument
1100 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1102 // Figure out which arguments are going to go in registers, and which in
1104 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1105 unsigned ArgRegIdx = 0;
1107 // Keep track of registers passing arguments
1108 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1109 // And the arguments passed on the stack
1110 SmallVector<SDOperand, 8> MemOpChains;
1112 for (unsigned i = 0; i != NumOps; ++i) {
1113 SDOperand Arg = Op.getOperand(5+2*i);
1115 // PtrOff will be used to store the current argument to the stack if a
1116 // register cannot be found for it.
1117 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1118 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1120 switch (Arg.getValueType()) {
1121 default: assert(0 && "Unexpected ValueType for argument!");
1125 if (ArgRegIdx != NumArgRegs) {
1126 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1128 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1129 ArgOffset += StackSlotSize;
1134 if (ArgRegIdx != NumArgRegs) {
1135 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1137 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1138 ArgOffset += StackSlotSize;
1145 if (ArgRegIdx != NumArgRegs) {
1146 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1148 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1149 ArgOffset += StackSlotSize;
1155 // Update number of stack bytes actually used, insert a call sequence start
1156 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1157 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1159 if (!MemOpChains.empty()) {
1160 // Adjust the stack pointer for the stack arguments.
1161 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1162 &MemOpChains[0], MemOpChains.size());
1165 // Build a sequence of copy-to-reg nodes chained together with token chain
1166 // and flag operands which copy the outgoing args into the appropriate regs.
1168 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1169 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1171 InFlag = Chain.getValue(1);
1174 std::vector<MVT::ValueType> NodeTys;
1175 NodeTys.push_back(MVT::Other); // Returns a chain
1176 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1178 SmallVector<SDOperand, 8> Ops;
1179 unsigned CallOpc = SPUISD::CALL;
1181 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1182 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1183 // node so that legalize doesn't hack it.
1184 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1185 GlobalValue *GV = G->getGlobal();
1186 unsigned CalleeVT = Callee.getValueType();
1188 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1189 // style calls, otherwise, external symbols are BRASL calls.
1191 // This may be an unsafe assumption for JIT and really large compilation
1193 if (GV->isDeclaration()) {
1194 Callee = DAG.getGlobalAddress(GV, CalleeVT);
1196 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT,
1197 DAG.getTargetGlobalAddress(GV, CalleeVT),
1198 DAG.getConstant(0, PtrVT));
1200 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1201 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1202 else if (SDNode *Dest = isLSAAddress(Callee, DAG))
1203 // If this is an absolute destination address that appears to be a legal
1204 // local store address, use the munged value.
1205 Callee = SDOperand(Dest, 0);
1207 Ops.push_back(Chain);
1208 Ops.push_back(Callee);
1210 // Add argument registers to the end of the list so that they are known live
1212 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1213 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1214 RegsToPass[i].second.getValueType()));
1217 Ops.push_back(InFlag);
1218 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1219 InFlag = Chain.getValue(1);
1221 SDOperand ResultVals[3];
1222 unsigned NumResults = 0;
1225 // If the call has results, copy the values out of the ret val registers.
1226 switch (Op.Val->getValueType(0)) {
1227 default: assert(0 && "Unexpected ret value!");
1228 case MVT::Other: break;
1230 if (Op.Val->getValueType(1) == MVT::i32) {
1231 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1232 ResultVals[0] = Chain.getValue(0);
1233 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1234 Chain.getValue(2)).getValue(1);
1235 ResultVals[1] = Chain.getValue(0);
1237 NodeTys.push_back(MVT::i32);
1239 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1240 ResultVals[0] = Chain.getValue(0);
1243 NodeTys.push_back(MVT::i32);
1246 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1247 ResultVals[0] = Chain.getValue(0);
1249 NodeTys.push_back(MVT::i64);
1253 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1254 InFlag).getValue(1);
1255 ResultVals[0] = Chain.getValue(0);
1257 NodeTys.push_back(Op.Val->getValueType(0));
1264 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1265 InFlag).getValue(1);
1266 ResultVals[0] = Chain.getValue(0);
1268 NodeTys.push_back(Op.Val->getValueType(0));
1272 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1273 DAG.getConstant(NumStackBytes, PtrVT));
1274 NodeTys.push_back(MVT::Other);
1276 // If the function returns void, just return the chain.
1277 if (NumResults == 0)
1280 // Otherwise, merge everything together with a MERGE_VALUES node.
1281 ResultVals[NumResults++] = Chain;
1282 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1283 ResultVals, NumResults);
1284 return Res.getValue(Op.ResNo);
1288 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1289 SmallVector<CCValAssign, 16> RVLocs;
1290 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1291 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1292 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1293 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1295 // If this is the first return lowered for this function, add the regs to the
1296 // liveout set for the function.
1297 if (DAG.getMachineFunction().liveout_empty()) {
1298 for (unsigned i = 0; i != RVLocs.size(); ++i)
1299 DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
1302 SDOperand Chain = Op.getOperand(0);
1305 // Copy the result values into the output registers.
1306 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1307 CCValAssign &VA = RVLocs[i];
1308 assert(VA.isRegLoc() && "Can only return in registers!");
1309 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1310 Flag = Chain.getValue(1);
1314 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1316 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1320 //===----------------------------------------------------------------------===//
1321 // Vector related lowering:
1322 //===----------------------------------------------------------------------===//
1324 static ConstantSDNode *
1325 getVecImm(SDNode *N) {
1326 SDOperand OpVal(0, 0);
1328 // Check to see if this buildvec has a single non-undef value in its elements.
1329 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1330 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1332 OpVal = N->getOperand(i);
1333 else if (OpVal != N->getOperand(i))
1337 if (OpVal.Val != 0) {
1338 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1343 return 0; // All UNDEF: use implicit def.; not Constant node
1346 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1347 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1349 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1350 MVT::ValueType ValueType) {
1351 if (ConstantSDNode *CN = getVecImm(N)) {
1352 uint64_t Value = CN->getValue();
1353 if (Value <= 0x3ffff)
1354 return DAG.getConstant(Value, ValueType);
1360 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1361 /// and the value fits into a signed 16-bit constant, and if so, return the
1363 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1364 MVT::ValueType ValueType) {
1365 if (ConstantSDNode *CN = getVecImm(N)) {
1366 if (ValueType == MVT::i32) {
1367 int Value = (int) CN->getValue();
1368 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1370 if (Value == SExtValue)
1371 return DAG.getConstant(Value, ValueType);
1372 } else if (ValueType == MVT::i16) {
1373 short Value = (short) CN->getValue();
1374 int SExtValue = ((int) Value << 16) >> 16;
1376 if (Value == (short) SExtValue)
1377 return DAG.getConstant(Value, ValueType);
1378 } else if (ValueType == MVT::i64) {
1379 int64_t Value = CN->getValue();
1380 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1382 if (Value == SExtValue)
1383 return DAG.getConstant(Value, ValueType);
1390 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1391 /// and the value fits into a signed 10-bit constant, and if so, return the
1393 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1394 MVT::ValueType ValueType) {
1395 if (ConstantSDNode *CN = getVecImm(N)) {
1396 int Value = (int) CN->getValue();
1397 if ((ValueType == MVT::i32 && isS10Constant(Value))
1398 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1399 return DAG.getConstant(Value, ValueType);
1405 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1406 /// and the value fits into a signed 8-bit constant, and if so, return the
1409 /// @note: The incoming vector is v16i8 because that's the only way we can load
1410 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1412 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1413 MVT::ValueType ValueType) {
1414 if (ConstantSDNode *CN = getVecImm(N)) {
1415 int Value = (int) CN->getValue();
1416 if (ValueType == MVT::i16
1417 && Value <= 0xffff /* truncated from uint64_t */
1418 && ((short) Value >> 8) == ((short) Value & 0xff))
1419 return DAG.getConstant(Value & 0xff, ValueType);
1420 else if (ValueType == MVT::i8
1421 && (Value & 0xff) == Value)
1422 return DAG.getConstant(Value, ValueType);
1428 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1429 /// and the value fits into a signed 16-bit constant, and if so, return the
1431 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1432 MVT::ValueType ValueType) {
1433 if (ConstantSDNode *CN = getVecImm(N)) {
1434 uint64_t Value = CN->getValue();
1435 if ((ValueType == MVT::i32
1436 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1437 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1438 return DAG.getConstant(Value >> 16, ValueType);
1444 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1445 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1446 if (ConstantSDNode *CN = getVecImm(N)) {
1447 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1453 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1454 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1455 if (ConstantSDNode *CN = getVecImm(N)) {
1456 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1462 // If this is a vector of constants or undefs, get the bits. A bit in
1463 // UndefBits is set if the corresponding element of the vector is an
1464 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1465 // zero. Return true if this is not an array of constants, false if it is.
1467 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1468 uint64_t UndefBits[2]) {
1469 // Start with zero'd results.
1470 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1472 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1473 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1474 SDOperand OpVal = BV->getOperand(i);
1476 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1477 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1479 uint64_t EltBits = 0;
1480 if (OpVal.getOpcode() == ISD::UNDEF) {
1481 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1482 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1484 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1485 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1486 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1487 const APFloat &apf = CN->getValueAPF();
1488 EltBits = (CN->getValueType(0) == MVT::f32
1489 ? FloatToBits(apf.convertToFloat())
1490 : DoubleToBits(apf.convertToDouble()));
1492 // Nonconstant element.
1496 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1499 //printf("%llx %llx %llx %llx\n",
1500 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1504 /// If this is a splat (repetition) of a value across the whole vector, return
1505 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1506 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1507 /// SplatSize = 1 byte.
1508 static bool isConstantSplat(const uint64_t Bits128[2],
1509 const uint64_t Undef128[2],
1511 uint64_t &SplatBits, uint64_t &SplatUndef,
1513 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1514 // the same as the lower 64-bits, ignoring undefs.
1515 uint64_t Bits64 = Bits128[0] | Bits128[1];
1516 uint64_t Undef64 = Undef128[0] & Undef128[1];
1517 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1518 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1519 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1520 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1522 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1523 if (MinSplatBits < 64) {
1525 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1527 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1528 if (MinSplatBits < 32) {
1530 // If the top 16-bits are different than the lower 16-bits, ignoring
1531 // undefs, we have an i32 splat.
1532 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1533 if (MinSplatBits < 16) {
1534 // If the top 8-bits are different than the lower 8-bits, ignoring
1535 // undefs, we have an i16 splat.
1536 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1537 // Otherwise, we have an 8-bit splat.
1538 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1539 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1545 SplatUndef = Undef16;
1552 SplatUndef = Undef32;
1558 SplatBits = Bits128[0];
1559 SplatUndef = Undef128[0];
1565 return false; // Can't be a splat if two pieces don't match.
1568 // If this is a case we can't handle, return null and let the default
1569 // expansion code take care of it. If we CAN select this case, and if it
1570 // selects to a single instruction, return Op. Otherwise, if we can codegen
1571 // this case more efficiently than a constant pool load, lower it to the
1572 // sequence of ops that should be used.
1573 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1574 MVT::ValueType VT = Op.getValueType();
1575 // If this is a vector of constants or undefs, get the bits. A bit in
1576 // UndefBits is set if the corresponding element of the vector is an
1577 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1579 uint64_t VectorBits[2];
1580 uint64_t UndefBits[2];
1581 uint64_t SplatBits, SplatUndef;
1583 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1584 || !isConstantSplat(VectorBits, UndefBits,
1585 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1586 SplatBits, SplatUndef, SplatSize))
1587 return SDOperand(); // Not a constant vector, not a splat.
1592 uint32_t Value32 = SplatBits;
1593 assert(SplatSize == 4
1594 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1595 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1596 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1597 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1598 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1602 uint64_t f64val = SplatBits;
1603 assert(SplatSize == 8
1604 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1605 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1606 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1607 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1608 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1612 // 8-bit constants have to be expanded to 16-bits
1613 unsigned short Value16 = SplatBits | (SplatBits << 8);
1615 for (int i = 0; i < 8; ++i)
1616 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1617 return DAG.getNode(ISD::BIT_CONVERT, VT,
1618 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1621 unsigned short Value16;
1623 Value16 = (unsigned short) (SplatBits & 0xffff);
1625 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1626 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1628 for (int i = 0; i < 8; ++i) Ops[i] = T;
1629 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1632 unsigned int Value = SplatBits;
1633 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1634 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1637 uint64_t val = SplatBits;
1638 uint32_t upper = uint32_t(val >> 32);
1639 uint32_t lower = uint32_t(val);
1644 SmallVector<SDOperand, 16> ShufBytes;
1646 bool upper_special, lower_special;
1648 // NOTE: This code creates common-case shuffle masks that can be easily
1649 // detected as common expressions. It is not attempting to create highly
1650 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1652 // Detect if the upper or lower half is a special shuffle mask pattern:
1653 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1654 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1656 // Create lower vector if not a special pattern
1657 if (!lower_special) {
1658 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1659 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1660 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1661 LO32C, LO32C, LO32C, LO32C));
1664 // Create upper vector if not a special pattern
1665 if (!upper_special) {
1666 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1667 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1668 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1669 HI32C, HI32C, HI32C, HI32C));
1672 // If either upper or lower are special, then the two input operands are
1673 // the same (basically, one of them is a "don't care")
1678 if (lower_special && upper_special) {
1679 // Unhappy situation... both upper and lower are special, so punt with
1680 // a target constant:
1681 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1682 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1686 for (int i = 0; i < 4; ++i) {
1687 for (int j = 0; j < 4; ++j) {
1689 bool process_upper, process_lower;
1692 process_upper = (upper_special && (i & 1) == 0);
1693 process_lower = (lower_special && (i & 1) == 1);
1695 if (process_upper || process_lower) {
1696 if ((process_upper && upper == 0)
1697 || (process_lower && lower == 0))
1699 else if ((process_upper && upper == 0xffffffff)
1700 || (process_lower && lower == 0xffffffff))
1702 else if ((process_upper && upper == 0x80000000)
1703 || (process_lower && lower == 0x80000000))
1704 val = (j == 0 ? 0xe0 : 0x80);
1706 val = i * 4 + j + ((i & 1) * 16);
1708 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1712 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1713 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1714 &ShufBytes[0], ShufBytes.size()));
1716 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1717 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1718 return DAG.getNode(ISD::BIT_CONVERT, VT,
1719 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1720 Zero, Zero, Zero, Zero));
1728 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1729 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1730 /// permutation vector, V3, is monotonically increasing with one "exception"
1731 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1732 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1733 /// In either case, the net result is going to eventually invoke SHUFB to
1734 /// permute/shuffle the bytes from V1 and V2.
1736 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1737 /// control word for byte/halfword/word insertion. This takes care of a single
1738 /// element move from V2 into V1.
1740 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1741 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1742 SDOperand V1 = Op.getOperand(0);
1743 SDOperand V2 = Op.getOperand(1);
1744 SDOperand PermMask = Op.getOperand(2);
1746 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1748 // If we have a single element being moved from V1 to V2, this can be handled
1749 // using the C*[DX] compute mask instructions, but the vector elements have
1750 // to be monotonically increasing with one exception element.
1751 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1752 unsigned EltsFromV2 = 0;
1754 unsigned V2EltIdx0 = 0;
1755 unsigned CurrElt = 0;
1756 bool monotonic = true;
1757 if (EltVT == MVT::i8)
1759 else if (EltVT == MVT::i16)
1761 else if (EltVT == MVT::i32)
1764 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1766 for (unsigned i = 0, e = PermMask.getNumOperands();
1767 EltsFromV2 <= 1 && monotonic && i != e;
1770 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1773 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1775 if (SrcElt >= V2EltIdx0) {
1777 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1778 } else if (CurrElt != SrcElt) {
1785 if (EltsFromV2 == 1 && monotonic) {
1786 // Compute mask and shuffle
1787 MachineFunction &MF = DAG.getMachineFunction();
1788 SSARegMap *RegMap = MF.getSSARegMap();
1789 unsigned VReg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
1790 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1791 // Initialize temporary register to 0
1792 SDOperand InitTempReg =
1793 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1794 // Copy register's contents as index in INSERT_MASK:
1795 SDOperand ShufMaskOp =
1796 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1797 DAG.getTargetConstant(V2Elt, MVT::i32),
1798 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1799 // Use shuffle mask in SHUFB synthetic instruction:
1800 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1802 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1803 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1805 SmallVector<SDOperand, 16> ResultMask;
1806 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1808 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1811 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1813 for (unsigned j = 0; j != BytesPerElement; ++j) {
1814 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1819 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1820 &ResultMask[0], ResultMask.size());
1821 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1825 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1826 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1828 if (Op0.Val->getOpcode() == ISD::Constant) {
1829 // For a constant, build the appropriate constant vector, which will
1830 // eventually simplify to a vector register load.
1832 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1833 SmallVector<SDOperand, 16> ConstVecValues;
1837 // Create a constant vector:
1838 switch (Op.getValueType()) {
1839 default: assert(0 && "Unexpected constant value type in "
1840 "LowerSCALAR_TO_VECTOR");
1841 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1842 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1843 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1844 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1845 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1846 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1849 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1850 for (size_t j = 0; j < n_copies; ++j)
1851 ConstVecValues.push_back(CValue);
1853 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1854 &ConstVecValues[0], ConstVecValues.size());
1856 // Otherwise, copy the value from one register to another:
1857 switch (Op0.getValueType()) {
1858 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1865 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1872 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1873 switch (Op.getValueType()) {
1875 SDOperand rA = Op.getOperand(0);
1876 SDOperand rB = Op.getOperand(1);
1877 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1878 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1879 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1880 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1882 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1886 // Multiply two v8i16 vectors (pipeline friendly version):
1887 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1888 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1889 // c) Use SELB to select upper and lower halves from the intermediate results
1891 // NOTE: We really want to move the FSMBI to earlier to actually get the
1892 // dual-issue. This code does manage to do this, even if it's a little on
1895 MachineFunction &MF = DAG.getMachineFunction();
1896 SSARegMap *RegMap = MF.getSSARegMap();
1897 SDOperand Chain = Op.getOperand(0);
1898 SDOperand rA = Op.getOperand(0);
1899 SDOperand rB = Op.getOperand(1);
1900 unsigned FSMBIreg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1901 unsigned HiProdReg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1904 DAG.getCopyToReg(Chain, FSMBIreg,
1905 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1906 DAG.getConstant(0xcccc, MVT::i32)));
1909 DAG.getCopyToReg(FSMBOp, HiProdReg,
1910 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1912 SDOperand HHProd_v4i32 =
1913 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1914 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1916 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1917 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1918 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1919 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1921 DAG.getConstant(16, MVT::i16))),
1922 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1925 // This M00sE is N@stI! (apologies to Monty Python)
1927 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1928 // is to break it all apart, sign extend, and reassemble the various
1929 // intermediate products.
1931 MachineFunction &MF = DAG.getMachineFunction();
1932 SSARegMap *RegMap = MF.getSSARegMap();
1933 SDOperand Chain = Op.getOperand(0);
1934 SDOperand rA = Op.getOperand(0);
1935 SDOperand rB = Op.getOperand(1);
1936 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1937 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1939 unsigned FSMBreg_2222 = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1940 unsigned LoProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1941 unsigned HiProd_reg = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
1944 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1945 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1946 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1948 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1950 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1953 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1954 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1956 SDOperand FSMBdef_2222 =
1957 DAG.getCopyToReg(Chain, FSMBreg_2222,
1958 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1959 DAG.getConstant(0x2222, MVT::i32)));
1961 SDOperand FSMBuse_2222 =
1962 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1964 SDOperand LoProd_1 =
1965 DAG.getCopyToReg(Chain, LoProd_reg,
1966 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1969 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1972 DAG.getNode(ISD::AND, MVT::v4i32,
1973 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1974 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1975 LoProdMask, LoProdMask,
1976 LoProdMask, LoProdMask));
1979 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1980 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1983 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1984 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1987 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1988 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1989 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1991 SDOperand HHProd_1 =
1992 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1993 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1994 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1995 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1996 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1999 DAG.getCopyToReg(Chain, HiProd_reg,
2000 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2002 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2006 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2007 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2009 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2010 DAG.getNode(ISD::OR, MVT::v4i32,
2015 cerr << "CellSPU: Unknown vector multiplication, got "
2016 << MVT::getValueTypeString(Op.getValueType())
2025 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2026 MachineFunction &MF = DAG.getMachineFunction();
2027 SSARegMap *RegMap = MF.getSSARegMap();
2029 SDOperand A = Op.getOperand(0);
2030 SDOperand B = Op.getOperand(1);
2031 unsigned VT = Op.getValueType();
2033 unsigned VRegBR, VRegC;
2035 if (VT == MVT::f32) {
2036 VRegBR = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2037 VRegC = RegMap->createVirtualRegister(&SPU::R32FPRegClass);
2039 VRegBR = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2040 VRegC = RegMap->createVirtualRegister(&SPU::VECREGRegClass);
2042 // TODO: make sure we're feeding FPInterp the right arguments
2043 // Right now: fi B, frest(B)
2046 // (Floating Interpolate (FP Reciprocal Estimate B))
2048 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2049 DAG.getNode(SPUISD::FPInterp, VT, B,
2050 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2052 // Computes A * BRcpl and stores in a temporary register
2054 DAG.getCopyToReg(BRcpl, VRegC,
2055 DAG.getNode(ISD::FMUL, VT, A,
2056 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2057 // What's the Chain variable do? It's magic!
2058 // TODO: set Chain = Op(0).getEntryNode()
2060 return DAG.getNode(ISD::FADD, VT,
2061 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2062 DAG.getNode(ISD::FMUL, VT,
2063 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2064 DAG.getNode(ISD::FSUB, VT, A,
2065 DAG.getNode(ISD::FMUL, VT, B,
2066 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2069 // Expands double-precision FDIV
2070 // Expects two doubles as inputs X and Y, does a floating point
2071 // reciprocal estimate, and three iterations of Newton-Raphson
2072 // to increase accuracy.
2073 //static SDOperand LowerFDIVf64(SDOperand Op, SelectionDAG &DAG) {
2074 // MachineFunction &MF = DAG.getMachineFunction();
2075 // SSARegMap *RegMap = MF.getSSARegMap();
2077 // SDOperand X = Op.getOperand(0);
2078 // SDOperand Y = Op.getOperand(1);
2081 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2082 unsigned VT = Op.getValueType();
2083 SDOperand N = Op.getOperand(0);
2084 SDOperand Elt = Op.getOperand(1);
2085 SDOperand ShufMask[16];
2086 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2088 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2090 int EltNo = (int) C->getValue();
2093 if (VT == MVT::i8 && EltNo >= 16)
2094 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2095 else if (VT == MVT::i16 && EltNo >= 8)
2096 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2097 else if (VT == MVT::i32 && EltNo >= 4)
2098 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2099 else if (VT == MVT::i64 && EltNo >= 2)
2100 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2102 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2103 // i32 and i64: Element 0 is the preferred slot
2104 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2107 // Need to generate shuffle mask and extract:
2108 int prefslot_begin = -1, prefslot_end = -1;
2109 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2113 prefslot_begin = prefslot_end = 3;
2117 prefslot_begin = 2; prefslot_end = 3;
2121 prefslot_begin = 0; prefslot_end = 3;
2125 prefslot_begin = 0; prefslot_end = 7;
2130 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2131 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2133 for (int i = 0; i < 16; ++i) {
2134 // zero fill uppper part of preferred slot, don't care about the
2136 unsigned int mask_val;
2138 if (i <= prefslot_end) {
2140 ((i < prefslot_begin)
2142 : elt_byte + (i - prefslot_begin));
2144 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2146 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2149 SDOperand ShufMaskVec =
2150 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2152 sizeof(ShufMask) / sizeof(ShufMask[0]));
2154 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2155 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2156 N, N, ShufMaskVec));
2160 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2161 SDOperand VecOp = Op.getOperand(0);
2162 SDOperand ValOp = Op.getOperand(1);
2163 SDOperand IdxOp = Op.getOperand(2);
2164 MVT::ValueType VT = Op.getValueType();
2166 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2167 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2169 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2170 // Use $2 because it's always 16-byte aligned and it's available:
2171 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2174 DAG.getNode(SPUISD::SHUFB, VT,
2175 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2177 DAG.getNode(SPUISD::INSERT_MASK, VT,
2178 DAG.getNode(ISD::ADD, PtrVT,
2180 DAG.getConstant(CN->getValue(),
2186 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2187 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2189 assert(Op.getValueType() == MVT::i8);
2192 assert(0 && "Unhandled i8 math operator");
2196 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2198 SDOperand N1 = Op.getOperand(1);
2199 N0 = (N0.getOpcode() != ISD::Constant
2200 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2201 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2202 N1 = (N1.getOpcode() != ISD::Constant
2203 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2204 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2205 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2206 DAG.getNode(Opc, MVT::i16, N0, N1));
2210 SDOperand N1 = Op.getOperand(1);
2212 N0 = (N0.getOpcode() != ISD::Constant
2213 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2214 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2215 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2216 N1 = (N1.getOpcode() != ISD::Constant
2217 ? DAG.getNode(N1Opc, MVT::i16, N1)
2218 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2219 SDOperand ExpandArg =
2220 DAG.getNode(ISD::OR, MVT::i16, N0,
2221 DAG.getNode(ISD::SHL, MVT::i16,
2222 N0, DAG.getConstant(8, MVT::i16)));
2223 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2224 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2228 SDOperand N1 = Op.getOperand(1);
2230 N0 = (N0.getOpcode() != ISD::Constant
2231 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2232 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2233 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2234 N1 = (N1.getOpcode() != ISD::Constant
2235 ? DAG.getNode(N1Opc, MVT::i16, N1)
2236 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2237 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2238 DAG.getNode(Opc, MVT::i16, N0, N1));
2241 SDOperand N1 = Op.getOperand(1);
2243 N0 = (N0.getOpcode() != ISD::Constant
2244 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2245 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2246 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2247 N1 = (N1.getOpcode() != ISD::Constant
2248 ? DAG.getNode(N1Opc, MVT::i16, N1)
2249 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2250 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2251 DAG.getNode(Opc, MVT::i16, N0, N1));
2254 SDOperand N1 = Op.getOperand(1);
2256 N0 = (N0.getOpcode() != ISD::Constant
2257 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2258 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2259 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2260 N1 = (N1.getOpcode() != ISD::Constant
2261 ? DAG.getNode(N1Opc, MVT::i16, N1)
2262 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2263 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2264 DAG.getNode(Opc, MVT::i16, N0, N1));
2272 //! Lower byte immediate operations for v16i8 vectors:
2274 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2277 MVT::ValueType VT = Op.getValueType();
2279 ConstVec = Op.getOperand(0);
2280 Arg = Op.getOperand(1);
2281 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2282 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2283 ConstVec = ConstVec.getOperand(0);
2285 ConstVec = Op.getOperand(1);
2286 Arg = Op.getOperand(0);
2287 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2288 ConstVec = ConstVec.getOperand(0);
2293 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2294 uint64_t VectorBits[2];
2295 uint64_t UndefBits[2];
2296 uint64_t SplatBits, SplatUndef;
2299 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2300 && isConstantSplat(VectorBits, UndefBits,
2301 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2302 SplatBits, SplatUndef, SplatSize)) {
2303 SDOperand tcVec[16];
2304 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2305 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2307 // Turn the BUILD_VECTOR into a set of target constants:
2308 for (size_t i = 0; i < tcVecSize; ++i)
2311 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2312 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2319 //! Lower i32 multiplication
2320 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2324 cerr << "CellSPU: Unknown LowerMUL value type, got "
2325 << MVT::getValueTypeString(Op.getValueType())
2331 SDOperand rA = Op.getOperand(0);
2332 SDOperand rB = Op.getOperand(1);
2334 return DAG.getNode(ISD::ADD, MVT::i32,
2335 DAG.getNode(ISD::ADD, MVT::i32,
2336 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2337 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2338 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2345 //! Custom lowering for CTPOP (count population)
2347 Custom lowering code that counts the number ones in the input
2348 operand. SPU has such an instruction, but it counts the number of
2349 ones per byte, which then have to be accumulated.
2351 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2352 unsigned VT = Op.getValueType();
2353 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2357 SDOperand N = Op.getOperand(0);
2358 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2360 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2361 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2363 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2367 MachineFunction &MF = DAG.getMachineFunction();
2368 SSARegMap *RegMap = MF.getSSARegMap();
2370 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R16CRegClass);
2372 SDOperand N = Op.getOperand(0);
2373 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2374 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2375 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2377 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2378 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2380 // CNTB_result becomes the chain to which all of the virtual registers
2381 // CNTB_reg, SUM1_reg become associated:
2382 SDOperand CNTB_result =
2383 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2385 SDOperand CNTB_rescopy =
2386 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2388 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2390 return DAG.getNode(ISD::AND, MVT::i16,
2391 DAG.getNode(ISD::ADD, MVT::i16,
2392 DAG.getNode(ISD::SRL, MVT::i16,
2399 MachineFunction &MF = DAG.getMachineFunction();
2400 SSARegMap *RegMap = MF.getSSARegMap();
2402 unsigned CNTB_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2403 unsigned SUM1_reg = RegMap->createVirtualRegister(&SPU::R32CRegClass);
2405 SDOperand N = Op.getOperand(0);
2406 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2407 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2408 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2409 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2411 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2412 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2414 // CNTB_result becomes the chain to which all of the virtual registers
2415 // CNTB_reg, SUM1_reg become associated:
2416 SDOperand CNTB_result =
2417 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2419 SDOperand CNTB_rescopy =
2420 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2423 DAG.getNode(ISD::SRL, MVT::i32,
2424 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2427 DAG.getNode(ISD::ADD, MVT::i32,
2428 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2430 SDOperand Sum1_rescopy =
2431 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2434 DAG.getNode(ISD::SRL, MVT::i32,
2435 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2438 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2439 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2441 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2451 /// LowerOperation - Provide custom lowering hooks for some operations.
2454 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2456 switch (Op.getOpcode()) {
2458 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2459 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2460 cerr << "*Op.Val:\n";
2467 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2469 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2470 case ISD::ConstantPool:
2471 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2472 case ISD::GlobalAddress:
2473 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2474 case ISD::JumpTable:
2475 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2477 return LowerConstant(Op, DAG);
2478 case ISD::ConstantFP:
2479 return LowerConstantFP(Op, DAG);
2480 case ISD::FORMAL_ARGUMENTS:
2481 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2483 return LowerCALL(Op, DAG);
2485 return LowerRET(Op, DAG, getTargetMachine());
2494 return LowerI8Math(Op, DAG, Op.getOpcode());
2496 // Vector-related lowering.
2497 case ISD::BUILD_VECTOR:
2498 return LowerBUILD_VECTOR(Op, DAG);
2499 case ISD::SCALAR_TO_VECTOR:
2500 return LowerSCALAR_TO_VECTOR(Op, DAG);
2501 case ISD::VECTOR_SHUFFLE:
2502 return LowerVECTOR_SHUFFLE(Op, DAG);
2503 case ISD::EXTRACT_VECTOR_ELT:
2504 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2505 case ISD::INSERT_VECTOR_ELT:
2506 return LowerINSERT_VECTOR_ELT(Op, DAG);
2508 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2512 return LowerByteImmed(Op, DAG);
2514 // Vector and i8 multiply:
2516 if (MVT::isVector(Op.getValueType()))
2517 return LowerVectorMUL(Op, DAG);
2518 else if (Op.getValueType() == MVT::i8)
2519 return LowerI8Math(Op, DAG, Op.getOpcode());
2521 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2524 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2525 return LowerFDIVf32(Op, DAG);
2526 // else if (Op.getValueType() == MVT::f64)
2527 // return LowerFDIVf64(Op, DAG);
2529 assert(0 && "Calling FDIV on unsupported MVT");
2532 return LowerCTPOP(Op, DAG);
2538 //===----------------------------------------------------------------------===//
2539 // Other Lowering Code
2540 //===----------------------------------------------------------------------===//
2543 SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2544 MachineBasicBlock *BB)
2549 //===----------------------------------------------------------------------===//
2550 // Target Optimization Hooks
2551 //===----------------------------------------------------------------------===//
2554 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2557 TargetMachine &TM = getTargetMachine();
2558 SelectionDAG &DAG = DCI.DAG;
2560 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2562 switch (N->getOpcode()) {
2565 // Look for obvious optimizations for shift left:
2566 // a) Replace 0 << V with 0
2567 // b) Replace V << 0 with V
2569 // N.B: llvm will generate an undef node if the shift amount is greater than
2570 // 15 (e.g.: V << 16), which will naturally trigger an assert.
2573 case SPU::SHLQBIIvec:
2575 case SPU::ROTHIr16_i32:
2577 case SPU::ROTIr32_i16:
2578 case SPU::ROTQBYIvec:
2579 case SPU::ROTQBYBIvec:
2580 case SPU::ROTQBIIvec:
2581 case SPU::ROTHMIr16:
2583 case SPU::ROTQMBYIvec: {
2584 if (N0.getOpcode() == ISD::Constant) {
2585 if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2586 if (C->getValue() == 0) // 0 << V -> 0.
2590 SDOperand N1 = N->getOperand(1);
2591 if (N1.getOpcode() == ISD::Constant) {
2592 if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2593 if (C->getValue() == 0) // V << 0 -> V
2604 //===----------------------------------------------------------------------===//
2605 // Inline Assembly Support
2606 //===----------------------------------------------------------------------===//
2608 /// getConstraintType - Given a constraint letter, return the type of
2609 /// constraint it is for this target.
2610 SPUTargetLowering::ConstraintType
2611 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2612 if (ConstraintLetter.size() == 1) {
2613 switch (ConstraintLetter[0]) {
2620 return C_RegisterClass;
2623 return TargetLowering::getConstraintType(ConstraintLetter);
2626 std::pair<unsigned, const TargetRegisterClass*>
2627 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2628 MVT::ValueType VT) const
2630 if (Constraint.size() == 1) {
2631 // GCC RS6000 Constraint Letters
2632 switch (Constraint[0]) {
2636 return std::make_pair(0U, SPU::R64CRegisterClass);
2637 return std::make_pair(0U, SPU::R32CRegisterClass);
2640 return std::make_pair(0U, SPU::R32FPRegisterClass);
2641 else if (VT == MVT::f64)
2642 return std::make_pair(0U, SPU::R64FPRegisterClass);
2645 return std::make_pair(0U, SPU::GPRCRegisterClass);
2649 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2653 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2655 uint64_t &KnownZero,
2657 const SelectionDAG &DAG,
2658 unsigned Depth ) const {
2663 // LowerAsmOperandForConstraint
2665 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2666 char ConstraintLetter,
2667 std::vector<SDOperand> &Ops,
2668 SelectionDAG &DAG) {
2669 // Default, for the time being, to the base class handler
2670 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2673 /// isLegalAddressImmediate - Return true if the integer value can be used
2674 /// as the offset of the target addressing mode.
2675 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2676 // SPU's addresses are 256K:
2677 return (V > -(1 << 18) && V < (1 << 18) - 1);
2680 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {