1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "llvm/ADT/VectorExtras.h"
18 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT::ValueType mapping to useful data for Cell SPU
41 struct valtype_map_s {
42 const MVT::ValueType valtype;
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
72 << MVT::getValueTypeString(VT)
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDOperand &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDOperand &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 // NOTE: i8 register class is not registered because we cannot determine when
126 // we need to zero or sign extend for custom-lowered loads and stores.
127 // NOTE: Ignore the previous note. For now. :-)
128 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
129 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
130 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
131 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
132 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
133 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
134 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
136 // SPU has no sign or zero extended loads for i1, i8, i16:
137 setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote);
138 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
139 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
140 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
141 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
142 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
143 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
144 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
146 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
147 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
148 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
149 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
150 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
151 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
152 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
153 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
155 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
156 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
157 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
159 // SPU constant load actions are custom lowered:
160 setOperationAction(ISD::Constant, MVT::i64, Custom);
161 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
162 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
164 // SPU's loads and stores have to be custom lowered:
165 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
167 setOperationAction(ISD::LOAD, sctype, Custom);
168 setOperationAction(ISD::STORE, sctype, Custom);
171 // Custom lower BRCOND for i1, i8 to "promote" the result to
172 // i32 and i16, respectively.
173 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
175 // Expand the jumptable branches
176 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
177 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
178 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
180 // SPU has no intrinsics for these particular operations:
181 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
182 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
183 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
184 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
186 // PowerPC has no SREM/UREM instructions
187 setOperationAction(ISD::SREM, MVT::i32, Expand);
188 setOperationAction(ISD::UREM, MVT::i32, Expand);
189 setOperationAction(ISD::SREM, MVT::i64, Expand);
190 setOperationAction(ISD::UREM, MVT::i64, Expand);
192 // We don't support sin/cos/sqrt/fmod
193 setOperationAction(ISD::FSIN , MVT::f64, Expand);
194 setOperationAction(ISD::FCOS , MVT::f64, Expand);
195 setOperationAction(ISD::FREM , MVT::f64, Expand);
196 setOperationAction(ISD::FSIN , MVT::f32, Expand);
197 setOperationAction(ISD::FCOS , MVT::f32, Expand);
198 setOperationAction(ISD::FREM , MVT::f32, Expand);
200 // If we're enabling GP optimizations, use hardware square root
201 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
202 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
204 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
205 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
207 // SPU can do rotate right and left, so legalize it... but customize for i8
208 // because instructions don't exist.
209 setOperationAction(ISD::ROTR, MVT::i32, Legal);
210 setOperationAction(ISD::ROTR, MVT::i16, Legal);
211 setOperationAction(ISD::ROTR, MVT::i8, Custom);
212 setOperationAction(ISD::ROTL, MVT::i32, Legal);
213 setOperationAction(ISD::ROTL, MVT::i16, Legal);
214 setOperationAction(ISD::ROTL, MVT::i8, Custom);
215 // SPU has no native version of shift left/right for i8
216 setOperationAction(ISD::SHL, MVT::i8, Custom);
217 setOperationAction(ISD::SRL, MVT::i8, Custom);
218 setOperationAction(ISD::SRA, MVT::i8, Custom);
220 // Custom lower i32 multiplications
221 setOperationAction(ISD::MUL, MVT::i32, Custom);
223 // Need to custom handle (some) common i8 math ops
224 setOperationAction(ISD::SUB, MVT::i8, Custom);
225 setOperationAction(ISD::MUL, MVT::i8, Custom);
227 // SPU does not have BSWAP. It does have i32 support CTLZ.
228 // CTPOP has to be custom lowered.
229 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
230 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
232 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
233 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
234 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
235 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
237 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
238 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
240 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
242 // SPU does not have select or setcc
243 setOperationAction(ISD::SELECT, MVT::i1, Expand);
244 setOperationAction(ISD::SELECT, MVT::i8, Expand);
245 setOperationAction(ISD::SELECT, MVT::i16, Expand);
246 setOperationAction(ISD::SELECT, MVT::i32, Expand);
247 setOperationAction(ISD::SELECT, MVT::i64, Expand);
248 setOperationAction(ISD::SELECT, MVT::f32, Expand);
249 setOperationAction(ISD::SELECT, MVT::f64, Expand);
251 setOperationAction(ISD::SETCC, MVT::i1, Expand);
252 setOperationAction(ISD::SETCC, MVT::i8, Expand);
253 setOperationAction(ISD::SETCC, MVT::i16, Expand);
254 setOperationAction(ISD::SETCC, MVT::i32, Expand);
255 setOperationAction(ISD::SETCC, MVT::i64, Expand);
256 setOperationAction(ISD::SETCC, MVT::f32, Expand);
257 setOperationAction(ISD::SETCC, MVT::f64, Expand);
259 // SPU has a legal FP -> signed INT instruction
260 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
261 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
262 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
263 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
265 // FDIV on SPU requires custom lowering
266 setOperationAction(ISD::FDIV, MVT::f32, Custom);
267 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
269 // SPU has [U|S]INT_TO_FP
270 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
271 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
272 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
273 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
274 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
275 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
276 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
277 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
279 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
280 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
281 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
282 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
284 // We cannot sextinreg(i1). Expand to shifts.
285 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
287 // Support label based line numbers.
288 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
289 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
291 // We want to legalize GlobalAddress and ConstantPool nodes into the
292 // appropriate instructions to materialize the address.
293 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
295 setOperationAction(ISD::GlobalAddress, sctype, Custom);
296 setOperationAction(ISD::ConstantPool, sctype, Custom);
297 setOperationAction(ISD::JumpTable, sctype, Custom);
300 // RET must be custom lowered, to meet ABI requirements
301 setOperationAction(ISD::RET, MVT::Other, Custom);
303 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
304 setOperationAction(ISD::VASTART , MVT::Other, Custom);
306 // Use the default implementation.
307 setOperationAction(ISD::VAARG , MVT::Other, Expand);
308 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
309 setOperationAction(ISD::VAEND , MVT::Other, Expand);
310 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
311 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
312 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
313 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
315 // Cell SPU has instructions for converting between i64 and fp.
316 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
317 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
319 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
320 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
322 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
323 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
325 // First set operation action for all vector types to expand. Then we
326 // will selectively turn on ones that can be effectively codegen'd.
327 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
328 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
329 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
330 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
331 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
332 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
334 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
335 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
336 // add/sub are legal for all supported vector VT's.
337 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
338 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
339 // mul has to be custom lowered.
340 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
342 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
343 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
344 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
345 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
346 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
347 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
349 // These operations need to be expanded:
350 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
351 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
352 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
353 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
354 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
356 // Custom lower build_vector, constant pool spills, insert and
357 // extract vector elements:
358 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
359 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
360 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
361 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
362 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
363 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
366 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
367 setOperationAction(ISD::AND, MVT::v16i8, Custom);
368 setOperationAction(ISD::OR, MVT::v16i8, Custom);
369 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
370 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
372 setSetCCResultType(MVT::i32);
373 setShiftAmountType(MVT::i32);
374 setSetCCResultContents(ZeroOrOneSetCCResult);
376 setStackPointerRegisterToSaveRestore(SPU::R1);
378 // We have target-specific dag combine patterns for the following nodes:
379 setTargetDAGCombine(ISD::ADD);
381 computeRegisterProperties();
385 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
387 if (node_names.empty()) {
388 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
389 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
390 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
391 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
392 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
393 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
394 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
395 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
396 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
397 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
398 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
399 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
400 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
401 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
402 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
403 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
404 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
405 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
406 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
407 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
408 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
409 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
410 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
411 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
412 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
413 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
414 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
415 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
416 "SPUISD::ROTBYTES_RIGHT_Z";
417 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
418 "SPUISD::ROTBYTES_RIGHT_S";
419 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
420 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
421 "SPUISD::ROTBYTES_LEFT_CHAINED";
422 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
423 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
424 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
425 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
426 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
429 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
431 return ((i != node_names.end()) ? i->second : 0);
434 //===----------------------------------------------------------------------===//
435 // Calling convention code:
436 //===----------------------------------------------------------------------===//
438 #include "SPUGenCallingConv.inc"
440 //===----------------------------------------------------------------------===//
441 // LowerOperation implementation
442 //===----------------------------------------------------------------------===//
444 /// Aligned load common code for CellSPU
446 \param[in] Op The SelectionDAG load or store operand
447 \param[in] DAG The selection DAG
448 \param[in] ST CellSPU subtarget information structure
449 \param[in,out] alignment Caller initializes this to the load or store node's
450 value from getAlignment(), may be updated while generating the aligned load
451 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
452 offset (divisible by 16, modulo 16 == 0)
453 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
454 offset of the preferred slot (modulo 16 != 0)
455 \param[in,out] VT Caller initializes this value type to the the load or store
456 node's loaded or stored value type; may be updated if an i1-extended load or
458 \param[out] was16aligned true if the base pointer had 16-byte alignment,
459 otherwise false. Can help to determine if the chunk needs to be rotated.
461 Both load and store lowering load a block of data aligned on a 16-byte
462 boundary. This is the common aligned load code shared between both.
465 AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
467 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
468 MVT::ValueType &VT, bool &was16aligned)
470 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
471 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
472 SDOperand basePtr = LSN->getBasePtr();
473 SDOperand chain = LSN->getChain();
475 if (basePtr.getOpcode() == ISD::ADD) {
476 SDOperand Op1 = basePtr.Val->getOperand(1);
478 if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
479 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
481 alignOffs = (int) CN->getValue();
482 prefSlotOffs = (int) (alignOffs & 0xf);
484 // Adjust the rotation amount to ensure that the final result ends up in
485 // the preferred slot:
486 prefSlotOffs -= vtm->prefslot_byte;
487 basePtr = basePtr.getOperand(0);
489 // Loading from memory, can we adjust alignment?
490 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
491 SDOperand APtr = basePtr.getOperand(0);
492 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
493 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
494 alignment = GSDN->getGlobal()->getAlignment();
499 prefSlotOffs = -vtm->prefslot_byte;
503 prefSlotOffs = -vtm->prefslot_byte;
506 if (alignment == 16) {
507 // Realign the base pointer as a D-Form address:
508 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
509 basePtr = DAG.getNode(ISD::ADD, PtrVT,
511 DAG.getConstant((alignOffs & ~0xf), PtrVT));
514 // Emit the vector load:
516 return DAG.getLoad(MVT::v16i8, chain, basePtr,
517 LSN->getSrcValue(), LSN->getSrcValueOffset(),
518 LSN->isVolatile(), 16);
521 // Unaligned load or we're using the "large memory" model, which means that
522 // we have to be very pessimistic:
523 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
524 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
528 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
529 DAG.getConstant((alignOffs & ~0xf), PtrVT));
530 was16aligned = false;
531 return DAG.getLoad(MVT::v16i8, chain, basePtr,
532 LSN->getSrcValue(), LSN->getSrcValueOffset(),
533 LSN->isVolatile(), 16);
536 /// Custom lower loads for CellSPU
538 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
539 within a 16-byte block, we have to rotate to extract the requested element.
542 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
543 LoadSDNode *LN = cast<LoadSDNode>(Op);
544 SDOperand the_chain = LN->getChain();
545 MVT::ValueType VT = LN->getMemoryVT();
546 MVT::ValueType OpVT = Op.Val->getValueType(0);
547 ISD::LoadExtType ExtType = LN->getExtensionType();
548 unsigned alignment = LN->getAlignment();
551 switch (LN->getAddressingMode()) {
552 case ISD::UNINDEXED: {
556 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
561 the_chain = result.getValue(1);
562 // Rotate the chunk if necessary
565 if (rotamt != 0 || !was16aligned) {
566 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
571 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
573 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
574 LoadSDNode *LN1 = cast<LoadSDNode>(result);
575 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
576 DAG.getConstant(rotamt, PtrVT));
579 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
580 the_chain = result.getValue(1);
583 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
585 MVT::ValueType vecVT = MVT::v16i8;
587 // Convert the loaded v16i8 vector to the appropriate vector type
588 // specified by the operand:
591 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
593 vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
596 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
597 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
598 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
599 the_chain = result.getValue(1);
601 // Handle the sign and zero-extending loads for i1 and i8:
604 if (ExtType == ISD::SEXTLOAD) {
605 NewOpC = (OpVT == MVT::i1
606 ? SPUISD::EXTRACT_I1_SEXT
607 : SPUISD::EXTRACT_I8_SEXT);
609 assert(ExtType == ISD::ZEXTLOAD);
610 NewOpC = (OpVT == MVT::i1
611 ? SPUISD::EXTRACT_I1_ZEXT
612 : SPUISD::EXTRACT_I8_ZEXT);
615 result = DAG.getNode(NewOpC, OpVT, result);
618 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
619 SDOperand retops[2] = {
624 result = DAG.getNode(SPUISD::LDRESULT, retvts,
625 retops, sizeof(retops) / sizeof(retops[0]));
632 case ISD::LAST_INDEXED_MODE:
633 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
635 cerr << (unsigned) LN->getAddressingMode() << "\n";
643 /// Custom lower stores for CellSPU
645 All CellSPU stores are aligned to 16-byte boundaries, so for elements
646 within a 16-byte block, we have to generate a shuffle to insert the
647 requested element into its place, then store the resulting block.
650 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
651 StoreSDNode *SN = cast<StoreSDNode>(Op);
652 SDOperand Value = SN->getValue();
653 MVT::ValueType VT = Value.getValueType();
654 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
655 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
656 unsigned alignment = SN->getAlignment();
658 switch (SN->getAddressingMode()) {
659 case ISD::UNINDEXED: {
660 int chunk_offset, slot_offset;
663 // The vector type we really want to load from the 16-byte chunk, except
664 // in the case of MVT::i1, which has to be v16i8.
665 unsigned vecVT, stVecVT = MVT::v16i8;
668 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
669 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
671 SDOperand alignLoadVec =
672 AlignedLoad(Op, DAG, ST, SN, alignment,
673 chunk_offset, slot_offset, VT, was16aligned);
675 if (alignLoadVec.Val == 0)
678 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
679 SDOperand basePtr = LN->getBasePtr();
680 SDOperand the_chain = alignLoadVec.getValue(1);
681 SDOperand theValue = SN->getValue();
685 && (theValue.getOpcode() == ISD::AssertZext
686 || theValue.getOpcode() == ISD::AssertSext)) {
687 // Drill down and get the value for zero- and sign-extended
689 theValue = theValue.getOperand(0);
694 SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
695 SDOperand insertEltPtr;
696 SDOperand insertEltOp;
698 // If the base pointer is already a D-form address, then just create
699 // a new D-form address with a slot offset and the orignal base pointer.
700 // Otherwise generate a D-form address with the slot offset relative
701 // to the stack pointer, which is always aligned.
702 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
703 DEBUG(basePtr.Val->dump(&DAG));
706 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
707 (basePtr.getOpcode() == ISD::ADD
708 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
709 insertEltPtr = basePtr;
712 // $sp is always aligned, so use it when necessary to avoid loading
715 basePtr.Val->hasOneUse() ? DAG.getRegister(SPU::R1, PtrVT) : basePtr;
716 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, ptrP, insertEltOffs);
718 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
722 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
723 result = DAG.getNode(SPUISD::SHUFB, vecVT,
724 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
726 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
728 result = DAG.getStore(the_chain, result, basePtr,
729 LN->getSrcValue(), LN->getSrcValueOffset(),
730 LN->isVolatile(), LN->getAlignment());
739 case ISD::LAST_INDEXED_MODE:
740 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
742 cerr << (unsigned) SN->getAddressingMode() << "\n";
750 /// Generate the address of a constant pool entry.
752 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
753 MVT::ValueType PtrVT = Op.getValueType();
754 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
755 Constant *C = CP->getConstVal();
756 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
757 SDOperand Zero = DAG.getConstant(0, PtrVT);
758 const TargetMachine &TM = DAG.getTarget();
760 if (TM.getRelocationModel() == Reloc::Static) {
761 if (!ST->usingLargeMem()) {
762 // Just return the SDOperand with the constant pool address in it.
763 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
766 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
767 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
769 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
771 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, CPI, Zero);
777 "LowerConstantPool: Relocation model other than static not supported.");
782 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
783 MVT::ValueType PtrVT = Op.getValueType();
784 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
785 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
786 SDOperand Zero = DAG.getConstant(0, PtrVT);
787 const TargetMachine &TM = DAG.getTarget();
789 if (TM.getRelocationModel() == Reloc::Static) {
790 SDOperand JmpAForm = DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
791 return (!ST->usingLargeMem()
793 : DAG.getNode(SPUISD::IndirectAddr, PtrVT, JmpAForm, Zero));
797 "LowerJumpTable: Relocation model other than static not supported.");
802 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
803 MVT::ValueType PtrVT = Op.getValueType();
804 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
805 GlobalValue *GV = GSDN->getGlobal();
806 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
807 const TargetMachine &TM = DAG.getTarget();
808 SDOperand Zero = DAG.getConstant(0, PtrVT);
810 if (TM.getRelocationModel() == Reloc::Static) {
811 if (!ST->usingLargeMem()) {
812 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
814 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
815 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
816 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
819 cerr << "LowerGlobalAddress: Relocation model other than static not "
828 //! Custom lower i64 integer constants
830 This code inserts all of the necessary juggling that needs to occur to load
831 a 64-bit constant into a register.
834 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
835 unsigned VT = Op.getValueType();
836 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
838 if (VT == MVT::i64) {
839 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
840 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
841 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
844 cerr << "LowerConstant: unhandled constant type "
845 << MVT::getValueTypeString(VT)
854 //! Custom lower double precision floating point constants
856 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
857 unsigned VT = Op.getValueType();
858 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
861 "LowerConstantFP: Node is not ConstantFPSDNode");
863 if (VT == MVT::f64) {
864 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
865 return DAG.getNode(ISD::BIT_CONVERT, VT,
866 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
872 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
874 LowerBRCOND(SDOperand Op, SelectionDAG &DAG)
876 SDOperand Cond = Op.getOperand(1);
877 MVT::ValueType CondVT = Cond.getValueType();
878 MVT::ValueType CondNVT;
880 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
881 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
882 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
884 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
887 return SDOperand(); // Unchanged
891 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
893 MachineFunction &MF = DAG.getMachineFunction();
894 MachineFrameInfo *MFI = MF.getFrameInfo();
895 MachineRegisterInfo &RegInfo = MF.getRegInfo();
896 SmallVector<SDOperand, 8> ArgValues;
897 SDOperand Root = Op.getOperand(0);
898 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
900 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
901 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
903 unsigned ArgOffset = SPUFrameInfo::minStackSize();
904 unsigned ArgRegIdx = 0;
905 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
907 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
909 // Add DAG nodes to load the arguments or copy them out of registers.
910 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
912 bool needsLoad = false;
913 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
914 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
918 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
919 << MVT::getValueTypeString(ObjectVT)
924 if (!isVarArg && ArgRegIdx < NumArgRegs) {
925 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
926 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
927 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
934 if (!isVarArg && ArgRegIdx < NumArgRegs) {
935 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
936 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
937 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
944 if (!isVarArg && ArgRegIdx < NumArgRegs) {
945 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
946 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
947 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
954 if (!isVarArg && ArgRegIdx < NumArgRegs) {
955 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
956 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
957 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
964 if (!isVarArg && ArgRegIdx < NumArgRegs) {
965 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
966 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
967 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
974 if (!isVarArg && ArgRegIdx < NumArgRegs) {
975 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
976 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
977 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
988 if (!isVarArg && ArgRegIdx < NumArgRegs) {
989 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
990 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
991 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
999 // We need to load the argument to a virtual register if we determined above
1000 // that we ran out of physical registers of the appropriate type
1002 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1003 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1004 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1005 ArgOffset += StackSlotSize;
1008 ArgValues.push_back(ArgVal);
1011 // If the function takes variable number of arguments, make a frame index for
1012 // the start of the first vararg value... for expansion of llvm.va_start.
1014 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1016 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1017 // If this function is vararg, store any remaining integer argument regs to
1018 // their spots on the stack so that they may be loaded by deferencing the
1019 // result of va_next.
1020 SmallVector<SDOperand, 8> MemOps;
1021 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1022 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1023 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1024 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1025 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1026 MemOps.push_back(Store);
1027 // Increment the address by four for the next argument to store
1028 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1029 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1031 if (!MemOps.empty())
1032 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1035 ArgValues.push_back(Root);
1037 // Return the new list of results.
1038 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1039 Op.Val->value_end());
1040 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1043 /// isLSAAddress - Return the immediate to use if the specified
1044 /// value is representable as a LSA address.
1045 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1046 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1049 int Addr = C->getValue();
1050 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1051 (Addr << 14 >> 14) != Addr)
1052 return 0; // Top 14 bits have to be sext of immediate.
1054 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1059 LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1060 SDOperand Chain = Op.getOperand(0);
1062 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1063 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1065 SDOperand Callee = Op.getOperand(4);
1066 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1067 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1068 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1069 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1071 // Handy pointer type
1072 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1074 // Accumulate how many bytes are to be pushed on the stack, including the
1075 // linkage area, and parameter passing area. According to the SPU ABI,
1076 // we minimally need space for [LR] and [SP]
1077 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1079 // Set up a copy of the stack pointer for use loading and storing any
1080 // arguments that may not fit in the registers available for argument
1082 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1084 // Figure out which arguments are going to go in registers, and which in
1086 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1087 unsigned ArgRegIdx = 0;
1089 // Keep track of registers passing arguments
1090 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1091 // And the arguments passed on the stack
1092 SmallVector<SDOperand, 8> MemOpChains;
1094 for (unsigned i = 0; i != NumOps; ++i) {
1095 SDOperand Arg = Op.getOperand(5+2*i);
1097 // PtrOff will be used to store the current argument to the stack if a
1098 // register cannot be found for it.
1099 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1100 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1102 switch (Arg.getValueType()) {
1103 default: assert(0 && "Unexpected ValueType for argument!");
1107 if (ArgRegIdx != NumArgRegs) {
1108 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1110 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1111 ArgOffset += StackSlotSize;
1116 if (ArgRegIdx != NumArgRegs) {
1117 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1119 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1120 ArgOffset += StackSlotSize;
1127 if (ArgRegIdx != NumArgRegs) {
1128 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1130 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1131 ArgOffset += StackSlotSize;
1137 // Update number of stack bytes actually used, insert a call sequence start
1138 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1139 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1141 if (!MemOpChains.empty()) {
1142 // Adjust the stack pointer for the stack arguments.
1143 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1144 &MemOpChains[0], MemOpChains.size());
1147 // Build a sequence of copy-to-reg nodes chained together with token chain
1148 // and flag operands which copy the outgoing args into the appropriate regs.
1150 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1151 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1153 InFlag = Chain.getValue(1);
1156 std::vector<MVT::ValueType> NodeTys;
1157 NodeTys.push_back(MVT::Other); // Returns a chain
1158 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1160 SmallVector<SDOperand, 8> Ops;
1161 unsigned CallOpc = SPUISD::CALL;
1163 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1164 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1165 // node so that legalize doesn't hack it.
1166 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1167 GlobalValue *GV = G->getGlobal();
1168 unsigned CalleeVT = Callee.getValueType();
1169 SDOperand Zero = DAG.getConstant(0, PtrVT);
1170 SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1172 if (!ST->usingLargeMem()) {
1173 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1174 // style calls, otherwise, external symbols are BRASL calls. This assumes
1175 // that declared/defined symbols are in the same compilation unit and can
1176 // be reached through PC-relative jumps.
1179 // This may be an unsafe assumption for JIT and really large compilation
1181 if (GV->isDeclaration()) {
1182 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1184 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1187 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1189 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1191 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1192 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1193 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1194 // If this is an absolute destination address that appears to be a legal
1195 // local store address, use the munged value.
1196 Callee = SDOperand(Dest, 0);
1199 Ops.push_back(Chain);
1200 Ops.push_back(Callee);
1202 // Add argument registers to the end of the list so that they are known live
1204 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1205 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1206 RegsToPass[i].second.getValueType()));
1209 Ops.push_back(InFlag);
1210 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1211 InFlag = Chain.getValue(1);
1213 Chain = DAG.getCALLSEQ_END(Chain,
1214 DAG.getConstant(NumStackBytes, PtrVT),
1215 DAG.getConstant(0, PtrVT),
1217 if (Op.Val->getValueType(0) != MVT::Other)
1218 InFlag = Chain.getValue(1);
1220 SDOperand ResultVals[3];
1221 unsigned NumResults = 0;
1224 // If the call has results, copy the values out of the ret val registers.
1225 switch (Op.Val->getValueType(0)) {
1226 default: assert(0 && "Unexpected ret value!");
1227 case MVT::Other: break;
1229 if (Op.Val->getValueType(1) == MVT::i32) {
1230 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1231 ResultVals[0] = Chain.getValue(0);
1232 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1233 Chain.getValue(2)).getValue(1);
1234 ResultVals[1] = Chain.getValue(0);
1236 NodeTys.push_back(MVT::i32);
1238 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1239 ResultVals[0] = Chain.getValue(0);
1242 NodeTys.push_back(MVT::i32);
1245 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1246 ResultVals[0] = Chain.getValue(0);
1248 NodeTys.push_back(MVT::i64);
1252 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1253 InFlag).getValue(1);
1254 ResultVals[0] = Chain.getValue(0);
1256 NodeTys.push_back(Op.Val->getValueType(0));
1263 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1264 InFlag).getValue(1);
1265 ResultVals[0] = Chain.getValue(0);
1267 NodeTys.push_back(Op.Val->getValueType(0));
1271 NodeTys.push_back(MVT::Other);
1273 // If the function returns void, just return the chain.
1274 if (NumResults == 0)
1277 // Otherwise, merge everything together with a MERGE_VALUES node.
1278 ResultVals[NumResults++] = Chain;
1279 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1280 ResultVals, NumResults);
1281 return Res.getValue(Op.ResNo);
1285 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1286 SmallVector<CCValAssign, 16> RVLocs;
1287 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1288 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1289 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1290 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1292 // If this is the first return lowered for this function, add the regs to the
1293 // liveout set for the function.
1294 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1295 for (unsigned i = 0; i != RVLocs.size(); ++i)
1296 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1299 SDOperand Chain = Op.getOperand(0);
1302 // Copy the result values into the output registers.
1303 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1304 CCValAssign &VA = RVLocs[i];
1305 assert(VA.isRegLoc() && "Can only return in registers!");
1306 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1307 Flag = Chain.getValue(1);
1311 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1313 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1317 //===----------------------------------------------------------------------===//
1318 // Vector related lowering:
1319 //===----------------------------------------------------------------------===//
1321 static ConstantSDNode *
1322 getVecImm(SDNode *N) {
1323 SDOperand OpVal(0, 0);
1325 // Check to see if this buildvec has a single non-undef value in its elements.
1326 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1327 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1329 OpVal = N->getOperand(i);
1330 else if (OpVal != N->getOperand(i))
1334 if (OpVal.Val != 0) {
1335 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1340 return 0; // All UNDEF: use implicit def.; not Constant node
1343 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1344 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1346 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1347 MVT::ValueType ValueType) {
1348 if (ConstantSDNode *CN = getVecImm(N)) {
1349 uint64_t Value = CN->getValue();
1350 if (Value <= 0x3ffff)
1351 return DAG.getConstant(Value, ValueType);
1357 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1358 /// and the value fits into a signed 16-bit constant, and if so, return the
1360 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1361 MVT::ValueType ValueType) {
1362 if (ConstantSDNode *CN = getVecImm(N)) {
1363 if (ValueType == MVT::i32) {
1364 int Value = (int) CN->getValue();
1365 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1367 if (Value == SExtValue)
1368 return DAG.getConstant(Value, ValueType);
1369 } else if (ValueType == MVT::i16) {
1370 short Value = (short) CN->getValue();
1371 int SExtValue = ((int) Value << 16) >> 16;
1373 if (Value == (short) SExtValue)
1374 return DAG.getConstant(Value, ValueType);
1375 } else if (ValueType == MVT::i64) {
1376 int64_t Value = CN->getValue();
1377 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1379 if (Value == SExtValue)
1380 return DAG.getConstant(Value, ValueType);
1387 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1388 /// and the value fits into a signed 10-bit constant, and if so, return the
1390 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1391 MVT::ValueType ValueType) {
1392 if (ConstantSDNode *CN = getVecImm(N)) {
1393 int Value = (int) CN->getValue();
1394 if ((ValueType == MVT::i32 && isS10Constant(Value))
1395 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1396 return DAG.getConstant(Value, ValueType);
1402 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1403 /// and the value fits into a signed 8-bit constant, and if so, return the
1406 /// @note: The incoming vector is v16i8 because that's the only way we can load
1407 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1409 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1410 MVT::ValueType ValueType) {
1411 if (ConstantSDNode *CN = getVecImm(N)) {
1412 int Value = (int) CN->getValue();
1413 if (ValueType == MVT::i16
1414 && Value <= 0xffff /* truncated from uint64_t */
1415 && ((short) Value >> 8) == ((short) Value & 0xff))
1416 return DAG.getConstant(Value & 0xff, ValueType);
1417 else if (ValueType == MVT::i8
1418 && (Value & 0xff) == Value)
1419 return DAG.getConstant(Value, ValueType);
1425 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1426 /// and the value fits into a signed 16-bit constant, and if so, return the
1428 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1429 MVT::ValueType ValueType) {
1430 if (ConstantSDNode *CN = getVecImm(N)) {
1431 uint64_t Value = CN->getValue();
1432 if ((ValueType == MVT::i32
1433 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1434 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1435 return DAG.getConstant(Value >> 16, ValueType);
1441 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1442 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1443 if (ConstantSDNode *CN = getVecImm(N)) {
1444 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1450 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1451 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1452 if (ConstantSDNode *CN = getVecImm(N)) {
1453 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1459 // If this is a vector of constants or undefs, get the bits. A bit in
1460 // UndefBits is set if the corresponding element of the vector is an
1461 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1462 // zero. Return true if this is not an array of constants, false if it is.
1464 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1465 uint64_t UndefBits[2]) {
1466 // Start with zero'd results.
1467 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1469 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1470 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1471 SDOperand OpVal = BV->getOperand(i);
1473 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1474 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1476 uint64_t EltBits = 0;
1477 if (OpVal.getOpcode() == ISD::UNDEF) {
1478 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1479 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1481 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1482 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1483 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1484 const APFloat &apf = CN->getValueAPF();
1485 EltBits = (CN->getValueType(0) == MVT::f32
1486 ? FloatToBits(apf.convertToFloat())
1487 : DoubleToBits(apf.convertToDouble()));
1489 // Nonconstant element.
1493 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1496 //printf("%llx %llx %llx %llx\n",
1497 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1501 /// If this is a splat (repetition) of a value across the whole vector, return
1502 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1503 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1504 /// SplatSize = 1 byte.
1505 static bool isConstantSplat(const uint64_t Bits128[2],
1506 const uint64_t Undef128[2],
1508 uint64_t &SplatBits, uint64_t &SplatUndef,
1510 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1511 // the same as the lower 64-bits, ignoring undefs.
1512 uint64_t Bits64 = Bits128[0] | Bits128[1];
1513 uint64_t Undef64 = Undef128[0] & Undef128[1];
1514 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1515 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1516 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1517 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1519 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1520 if (MinSplatBits < 64) {
1522 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1524 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1525 if (MinSplatBits < 32) {
1527 // If the top 16-bits are different than the lower 16-bits, ignoring
1528 // undefs, we have an i32 splat.
1529 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1530 if (MinSplatBits < 16) {
1531 // If the top 8-bits are different than the lower 8-bits, ignoring
1532 // undefs, we have an i16 splat.
1533 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1534 // Otherwise, we have an 8-bit splat.
1535 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1536 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1542 SplatUndef = Undef16;
1549 SplatUndef = Undef32;
1555 SplatBits = Bits128[0];
1556 SplatUndef = Undef128[0];
1562 return false; // Can't be a splat if two pieces don't match.
1565 // If this is a case we can't handle, return null and let the default
1566 // expansion code take care of it. If we CAN select this case, and if it
1567 // selects to a single instruction, return Op. Otherwise, if we can codegen
1568 // this case more efficiently than a constant pool load, lower it to the
1569 // sequence of ops that should be used.
1570 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1571 MVT::ValueType VT = Op.getValueType();
1572 // If this is a vector of constants or undefs, get the bits. A bit in
1573 // UndefBits is set if the corresponding element of the vector is an
1574 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1576 uint64_t VectorBits[2];
1577 uint64_t UndefBits[2];
1578 uint64_t SplatBits, SplatUndef;
1580 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1581 || !isConstantSplat(VectorBits, UndefBits,
1582 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1583 SplatBits, SplatUndef, SplatSize))
1584 return SDOperand(); // Not a constant vector, not a splat.
1589 uint32_t Value32 = SplatBits;
1590 assert(SplatSize == 4
1591 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1592 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1593 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1594 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1595 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1599 uint64_t f64val = SplatBits;
1600 assert(SplatSize == 8
1601 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1602 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1603 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1604 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1605 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1609 // 8-bit constants have to be expanded to 16-bits
1610 unsigned short Value16 = SplatBits | (SplatBits << 8);
1612 for (int i = 0; i < 8; ++i)
1613 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1614 return DAG.getNode(ISD::BIT_CONVERT, VT,
1615 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1618 unsigned short Value16;
1620 Value16 = (unsigned short) (SplatBits & 0xffff);
1622 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1623 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1625 for (int i = 0; i < 8; ++i) Ops[i] = T;
1626 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1629 unsigned int Value = SplatBits;
1630 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1631 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1634 uint64_t val = SplatBits;
1635 uint32_t upper = uint32_t(val >> 32);
1636 uint32_t lower = uint32_t(val);
1641 SmallVector<SDOperand, 16> ShufBytes;
1643 bool upper_special, lower_special;
1645 // NOTE: This code creates common-case shuffle masks that can be easily
1646 // detected as common expressions. It is not attempting to create highly
1647 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1649 // Detect if the upper or lower half is a special shuffle mask pattern:
1650 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1651 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1653 // Create lower vector if not a special pattern
1654 if (!lower_special) {
1655 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1656 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1657 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1658 LO32C, LO32C, LO32C, LO32C));
1661 // Create upper vector if not a special pattern
1662 if (!upper_special) {
1663 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1664 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1665 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1666 HI32C, HI32C, HI32C, HI32C));
1669 // If either upper or lower are special, then the two input operands are
1670 // the same (basically, one of them is a "don't care")
1675 if (lower_special && upper_special) {
1676 // Unhappy situation... both upper and lower are special, so punt with
1677 // a target constant:
1678 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1679 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1683 for (int i = 0; i < 4; ++i) {
1684 for (int j = 0; j < 4; ++j) {
1686 bool process_upper, process_lower;
1689 process_upper = (upper_special && (i & 1) == 0);
1690 process_lower = (lower_special && (i & 1) == 1);
1692 if (process_upper || process_lower) {
1693 if ((process_upper && upper == 0)
1694 || (process_lower && lower == 0))
1696 else if ((process_upper && upper == 0xffffffff)
1697 || (process_lower && lower == 0xffffffff))
1699 else if ((process_upper && upper == 0x80000000)
1700 || (process_lower && lower == 0x80000000))
1701 val = (j == 0 ? 0xe0 : 0x80);
1703 val = i * 4 + j + ((i & 1) * 16);
1705 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1709 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1710 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1711 &ShufBytes[0], ShufBytes.size()));
1713 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1714 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1715 return DAG.getNode(ISD::BIT_CONVERT, VT,
1716 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1717 Zero, Zero, Zero, Zero));
1725 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1726 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1727 /// permutation vector, V3, is monotonically increasing with one "exception"
1728 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1729 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1730 /// In either case, the net result is going to eventually invoke SHUFB to
1731 /// permute/shuffle the bytes from V1 and V2.
1733 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1734 /// control word for byte/halfword/word insertion. This takes care of a single
1735 /// element move from V2 into V1.
1737 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1738 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1739 SDOperand V1 = Op.getOperand(0);
1740 SDOperand V2 = Op.getOperand(1);
1741 SDOperand PermMask = Op.getOperand(2);
1743 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1745 // If we have a single element being moved from V1 to V2, this can be handled
1746 // using the C*[DX] compute mask instructions, but the vector elements have
1747 // to be monotonically increasing with one exception element.
1748 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1749 unsigned EltsFromV2 = 0;
1751 unsigned V2EltIdx0 = 0;
1752 unsigned CurrElt = 0;
1753 bool monotonic = true;
1754 if (EltVT == MVT::i8)
1756 else if (EltVT == MVT::i16)
1758 else if (EltVT == MVT::i32)
1761 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1763 for (unsigned i = 0, e = PermMask.getNumOperands();
1764 EltsFromV2 <= 1 && monotonic && i != e;
1767 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1770 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1772 if (SrcElt >= V2EltIdx0) {
1774 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1775 } else if (CurrElt != SrcElt) {
1782 if (EltsFromV2 == 1 && monotonic) {
1783 // Compute mask and shuffle
1784 MachineFunction &MF = DAG.getMachineFunction();
1785 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1786 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1787 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1788 // Initialize temporary register to 0
1789 SDOperand InitTempReg =
1790 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1791 // Copy register's contents as index in INSERT_MASK:
1792 SDOperand ShufMaskOp =
1793 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1794 DAG.getTargetConstant(V2Elt, MVT::i32),
1795 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1796 // Use shuffle mask in SHUFB synthetic instruction:
1797 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1799 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1800 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1802 SmallVector<SDOperand, 16> ResultMask;
1803 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1805 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1808 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1810 for (unsigned j = 0; j != BytesPerElement; ++j) {
1811 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1816 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1817 &ResultMask[0], ResultMask.size());
1818 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1822 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1823 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1825 if (Op0.Val->getOpcode() == ISD::Constant) {
1826 // For a constant, build the appropriate constant vector, which will
1827 // eventually simplify to a vector register load.
1829 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1830 SmallVector<SDOperand, 16> ConstVecValues;
1834 // Create a constant vector:
1835 switch (Op.getValueType()) {
1836 default: assert(0 && "Unexpected constant value type in "
1837 "LowerSCALAR_TO_VECTOR");
1838 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1839 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1840 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1841 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1842 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1843 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1846 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1847 for (size_t j = 0; j < n_copies; ++j)
1848 ConstVecValues.push_back(CValue);
1850 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1851 &ConstVecValues[0], ConstVecValues.size());
1853 // Otherwise, copy the value from one register to another:
1854 switch (Op0.getValueType()) {
1855 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1862 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1869 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1870 switch (Op.getValueType()) {
1872 SDOperand rA = Op.getOperand(0);
1873 SDOperand rB = Op.getOperand(1);
1874 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1875 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1876 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1877 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1879 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1883 // Multiply two v8i16 vectors (pipeline friendly version):
1884 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1885 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1886 // c) Use SELB to select upper and lower halves from the intermediate results
1888 // NOTE: We really want to move the FSMBI to earlier to actually get the
1889 // dual-issue. This code does manage to do this, even if it's a little on
1892 MachineFunction &MF = DAG.getMachineFunction();
1893 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1894 SDOperand Chain = Op.getOperand(0);
1895 SDOperand rA = Op.getOperand(0);
1896 SDOperand rB = Op.getOperand(1);
1897 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1898 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1901 DAG.getCopyToReg(Chain, FSMBIreg,
1902 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1903 DAG.getConstant(0xcccc, MVT::i32)));
1906 DAG.getCopyToReg(FSMBOp, HiProdReg,
1907 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1909 SDOperand HHProd_v4i32 =
1910 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1911 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1913 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1914 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1915 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1916 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1918 DAG.getConstant(16, MVT::i16))),
1919 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1922 // This M00sE is N@stI! (apologies to Monty Python)
1924 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1925 // is to break it all apart, sign extend, and reassemble the various
1926 // intermediate products.
1928 MachineFunction &MF = DAG.getMachineFunction();
1929 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1930 SDOperand Chain = Op.getOperand(0);
1931 SDOperand rA = Op.getOperand(0);
1932 SDOperand rB = Op.getOperand(1);
1933 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1934 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1936 unsigned FSMBreg_2222 = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1937 unsigned LoProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1938 unsigned HiProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1941 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1942 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1943 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1945 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1947 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1950 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1951 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1953 SDOperand FSMBdef_2222 =
1954 DAG.getCopyToReg(Chain, FSMBreg_2222,
1955 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1956 DAG.getConstant(0x2222, MVT::i32)));
1958 SDOperand FSMBuse_2222 =
1959 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1961 SDOperand LoProd_1 =
1962 DAG.getCopyToReg(Chain, LoProd_reg,
1963 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1966 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1969 DAG.getNode(ISD::AND, MVT::v4i32,
1970 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1971 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1972 LoProdMask, LoProdMask,
1973 LoProdMask, LoProdMask));
1976 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1977 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1980 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1981 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1984 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1985 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1986 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1988 SDOperand HHProd_1 =
1989 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1990 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1991 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1992 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1993 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1996 DAG.getCopyToReg(Chain, HiProd_reg,
1997 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1999 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2003 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2004 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2006 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2007 DAG.getNode(ISD::OR, MVT::v4i32,
2012 cerr << "CellSPU: Unknown vector multiplication, got "
2013 << MVT::getValueTypeString(Op.getValueType())
2022 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2023 MachineFunction &MF = DAG.getMachineFunction();
2024 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2026 SDOperand A = Op.getOperand(0);
2027 SDOperand B = Op.getOperand(1);
2028 unsigned VT = Op.getValueType();
2030 unsigned VRegBR, VRegC;
2032 if (VT == MVT::f32) {
2033 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2034 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2036 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2037 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2039 // TODO: make sure we're feeding FPInterp the right arguments
2040 // Right now: fi B, frest(B)
2043 // (Floating Interpolate (FP Reciprocal Estimate B))
2045 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2046 DAG.getNode(SPUISD::FPInterp, VT, B,
2047 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2049 // Computes A * BRcpl and stores in a temporary register
2051 DAG.getCopyToReg(BRcpl, VRegC,
2052 DAG.getNode(ISD::FMUL, VT, A,
2053 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2054 // What's the Chain variable do? It's magic!
2055 // TODO: set Chain = Op(0).getEntryNode()
2057 return DAG.getNode(ISD::FADD, VT,
2058 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2059 DAG.getNode(ISD::FMUL, VT,
2060 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2061 DAG.getNode(ISD::FSUB, VT, A,
2062 DAG.getNode(ISD::FMUL, VT, B,
2063 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2066 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2067 unsigned VT = Op.getValueType();
2068 SDOperand N = Op.getOperand(0);
2069 SDOperand Elt = Op.getOperand(1);
2070 SDOperand ShufMask[16];
2071 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2073 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2075 int EltNo = (int) C->getValue();
2078 if (VT == MVT::i8 && EltNo >= 16)
2079 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2080 else if (VT == MVT::i16 && EltNo >= 8)
2081 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2082 else if (VT == MVT::i32 && EltNo >= 4)
2083 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2084 else if (VT == MVT::i64 && EltNo >= 2)
2085 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2087 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2088 // i32 and i64: Element 0 is the preferred slot
2089 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2092 // Need to generate shuffle mask and extract:
2093 int prefslot_begin = -1, prefslot_end = -1;
2094 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2098 prefslot_begin = prefslot_end = 3;
2102 prefslot_begin = 2; prefslot_end = 3;
2106 prefslot_begin = 0; prefslot_end = 3;
2110 prefslot_begin = 0; prefslot_end = 7;
2115 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2116 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2118 for (int i = 0; i < 16; ++i) {
2119 // zero fill uppper part of preferred slot, don't care about the
2121 unsigned int mask_val;
2123 if (i <= prefslot_end) {
2125 ((i < prefslot_begin)
2127 : elt_byte + (i - prefslot_begin));
2129 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2131 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2134 SDOperand ShufMaskVec =
2135 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2137 sizeof(ShufMask) / sizeof(ShufMask[0]));
2139 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2140 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2141 N, N, ShufMaskVec));
2145 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2146 SDOperand VecOp = Op.getOperand(0);
2147 SDOperand ValOp = Op.getOperand(1);
2148 SDOperand IdxOp = Op.getOperand(2);
2149 MVT::ValueType VT = Op.getValueType();
2151 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2152 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2154 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2155 // Use $2 because it's always 16-byte aligned and it's available:
2156 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2159 DAG.getNode(SPUISD::SHUFB, VT,
2160 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2162 DAG.getNode(SPUISD::INSERT_MASK, VT,
2163 DAG.getNode(ISD::ADD, PtrVT,
2165 DAG.getConstant(CN->getValue(),
2171 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2172 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2174 assert(Op.getValueType() == MVT::i8);
2177 assert(0 && "Unhandled i8 math operator");
2181 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2183 SDOperand N1 = Op.getOperand(1);
2184 N0 = (N0.getOpcode() != ISD::Constant
2185 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2186 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2187 N1 = (N1.getOpcode() != ISD::Constant
2188 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2189 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2190 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2191 DAG.getNode(Opc, MVT::i16, N0, N1));
2195 SDOperand N1 = Op.getOperand(1);
2197 N0 = (N0.getOpcode() != ISD::Constant
2198 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2199 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2200 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2201 N1 = (N1.getOpcode() != ISD::Constant
2202 ? DAG.getNode(N1Opc, MVT::i16, N1)
2203 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2204 SDOperand ExpandArg =
2205 DAG.getNode(ISD::OR, MVT::i16, N0,
2206 DAG.getNode(ISD::SHL, MVT::i16,
2207 N0, DAG.getConstant(8, MVT::i16)));
2208 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2209 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2213 SDOperand N1 = Op.getOperand(1);
2215 N0 = (N0.getOpcode() != ISD::Constant
2216 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2217 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2218 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2219 N1 = (N1.getOpcode() != ISD::Constant
2220 ? DAG.getNode(N1Opc, MVT::i16, N1)
2221 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2222 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2223 DAG.getNode(Opc, MVT::i16, N0, N1));
2226 SDOperand N1 = Op.getOperand(1);
2228 N0 = (N0.getOpcode() != ISD::Constant
2229 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2230 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2231 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2232 N1 = (N1.getOpcode() != ISD::Constant
2233 ? DAG.getNode(N1Opc, MVT::i16, N1)
2234 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2235 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2236 DAG.getNode(Opc, MVT::i16, N0, N1));
2239 SDOperand N1 = Op.getOperand(1);
2241 N0 = (N0.getOpcode() != ISD::Constant
2242 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2243 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2244 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2245 N1 = (N1.getOpcode() != ISD::Constant
2246 ? DAG.getNode(N1Opc, MVT::i16, N1)
2247 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2248 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2249 DAG.getNode(Opc, MVT::i16, N0, N1));
2257 //! Lower byte immediate operations for v16i8 vectors:
2259 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2262 MVT::ValueType VT = Op.getValueType();
2264 ConstVec = Op.getOperand(0);
2265 Arg = Op.getOperand(1);
2266 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2267 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2268 ConstVec = ConstVec.getOperand(0);
2270 ConstVec = Op.getOperand(1);
2271 Arg = Op.getOperand(0);
2272 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2273 ConstVec = ConstVec.getOperand(0);
2278 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2279 uint64_t VectorBits[2];
2280 uint64_t UndefBits[2];
2281 uint64_t SplatBits, SplatUndef;
2284 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2285 && isConstantSplat(VectorBits, UndefBits,
2286 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2287 SplatBits, SplatUndef, SplatSize)) {
2288 SDOperand tcVec[16];
2289 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2290 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2292 // Turn the BUILD_VECTOR into a set of target constants:
2293 for (size_t i = 0; i < tcVecSize; ++i)
2296 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2297 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2304 //! Lower i32 multiplication
2305 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2309 cerr << "CellSPU: Unknown LowerMUL value type, got "
2310 << MVT::getValueTypeString(Op.getValueType())
2316 SDOperand rA = Op.getOperand(0);
2317 SDOperand rB = Op.getOperand(1);
2319 return DAG.getNode(ISD::ADD, MVT::i32,
2320 DAG.getNode(ISD::ADD, MVT::i32,
2321 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2322 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2323 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2330 //! Custom lowering for CTPOP (count population)
2332 Custom lowering code that counts the number ones in the input
2333 operand. SPU has such an instruction, but it counts the number of
2334 ones per byte, which then have to be accumulated.
2336 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2337 unsigned VT = Op.getValueType();
2338 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2342 SDOperand N = Op.getOperand(0);
2343 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2345 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2346 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2348 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2352 MachineFunction &MF = DAG.getMachineFunction();
2353 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2355 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2357 SDOperand N = Op.getOperand(0);
2358 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2359 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2360 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2362 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2363 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2365 // CNTB_result becomes the chain to which all of the virtual registers
2366 // CNTB_reg, SUM1_reg become associated:
2367 SDOperand CNTB_result =
2368 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2370 SDOperand CNTB_rescopy =
2371 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2373 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2375 return DAG.getNode(ISD::AND, MVT::i16,
2376 DAG.getNode(ISD::ADD, MVT::i16,
2377 DAG.getNode(ISD::SRL, MVT::i16,
2384 MachineFunction &MF = DAG.getMachineFunction();
2385 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2387 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2388 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2390 SDOperand N = Op.getOperand(0);
2391 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2392 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2393 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2394 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2396 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2397 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2399 // CNTB_result becomes the chain to which all of the virtual registers
2400 // CNTB_reg, SUM1_reg become associated:
2401 SDOperand CNTB_result =
2402 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2404 SDOperand CNTB_rescopy =
2405 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2408 DAG.getNode(ISD::SRL, MVT::i32,
2409 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2412 DAG.getNode(ISD::ADD, MVT::i32,
2413 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2415 SDOperand Sum1_rescopy =
2416 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2419 DAG.getNode(ISD::SRL, MVT::i32,
2420 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2423 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2424 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2426 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2436 /// LowerOperation - Provide custom lowering hooks for some operations.
2439 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2441 switch (Op.getOpcode()) {
2443 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2444 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2445 cerr << "*Op.Val:\n";
2452 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2454 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2455 case ISD::ConstantPool:
2456 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2457 case ISD::GlobalAddress:
2458 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2459 case ISD::JumpTable:
2460 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2462 return LowerConstant(Op, DAG);
2463 case ISD::ConstantFP:
2464 return LowerConstantFP(Op, DAG);
2466 return LowerBRCOND(Op, DAG);
2467 case ISD::FORMAL_ARGUMENTS:
2468 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2470 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2472 return LowerRET(Op, DAG, getTargetMachine());
2481 return LowerI8Math(Op, DAG, Op.getOpcode());
2483 // Vector-related lowering.
2484 case ISD::BUILD_VECTOR:
2485 return LowerBUILD_VECTOR(Op, DAG);
2486 case ISD::SCALAR_TO_VECTOR:
2487 return LowerSCALAR_TO_VECTOR(Op, DAG);
2488 case ISD::VECTOR_SHUFFLE:
2489 return LowerVECTOR_SHUFFLE(Op, DAG);
2490 case ISD::EXTRACT_VECTOR_ELT:
2491 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2492 case ISD::INSERT_VECTOR_ELT:
2493 return LowerINSERT_VECTOR_ELT(Op, DAG);
2495 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2499 return LowerByteImmed(Op, DAG);
2501 // Vector and i8 multiply:
2503 if (MVT::isVector(Op.getValueType()))
2504 return LowerVectorMUL(Op, DAG);
2505 else if (Op.getValueType() == MVT::i8)
2506 return LowerI8Math(Op, DAG, Op.getOpcode());
2508 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2511 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2512 return LowerFDIVf32(Op, DAG);
2513 // else if (Op.getValueType() == MVT::f64)
2514 // return LowerFDIVf64(Op, DAG);
2516 assert(0 && "Calling FDIV on unsupported MVT");
2519 return LowerCTPOP(Op, DAG);
2525 //===----------------------------------------------------------------------===//
2526 // Target Optimization Hooks
2527 //===----------------------------------------------------------------------===//
2530 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2533 TargetMachine &TM = getTargetMachine();
2535 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2536 SelectionDAG &DAG = DCI.DAG;
2537 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2539 switch (N->getOpcode()) {
2541 case SPUISD::IndirectAddr: {
2542 if (!ST->usingLargeMem() && N0.getOpcode() == SPUISD::AFormAddr) {
2543 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2544 if (CN->getValue() == 0) {
2545 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2546 // (SPUaform <addr>, 0)
2548 DEBUG(cerr << "Replace: ");
2549 DEBUG(N->dump(&DAG));
2550 DEBUG(cerr << "\nWith: ");
2551 DEBUG(N0.Val->dump(&DAG));
2552 DEBUG(cerr << "\n");
2559 SDOperand Op0 = N->getOperand(0);
2560 SDOperand Op1 = N->getOperand(1);
2562 if ((Op1.getOpcode() == ISD::Constant
2563 || Op1.getOpcode() == ISD::TargetConstant)
2564 && Op0.getOpcode() == SPUISD::IndirectAddr) {
2565 SDOperand Op01 = Op0.getOperand(1);
2566 if (Op01.getOpcode() == ISD::Constant
2567 || Op01.getOpcode() == ISD::TargetConstant) {
2568 // (add <const>, (SPUindirect <arg>, <const>)) ->
2569 // (SPUindirect <arg>, <const + const>)
2570 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2571 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2572 SDOperand combinedConst =
2573 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2574 Op0.getValueType());
2576 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2577 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2578 DEBUG(cerr << "With: (SPUindirect <arg>, "
2579 << CN0->getValue() + CN1->getValue() << ")\n");
2580 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2581 Op0.getOperand(0), combinedConst);
2583 } else if ((Op0.getOpcode() == ISD::Constant
2584 || Op0.getOpcode() == ISD::TargetConstant)
2585 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2586 SDOperand Op11 = Op1.getOperand(1);
2587 if (Op11.getOpcode() == ISD::Constant
2588 || Op11.getOpcode() == ISD::TargetConstant) {
2589 // (add (SPUindirect <arg>, <const>), <const>) ->
2590 // (SPUindirect <arg>, <const + const>)
2591 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2592 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2593 SDOperand combinedConst =
2594 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2595 Op0.getValueType());
2597 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2598 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2599 DEBUG(cerr << "With: (SPUindirect <arg>, "
2600 << CN0->getValue() + CN1->getValue() << ")\n");
2602 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2603 Op1.getOperand(0), combinedConst);
2608 // Otherwise, return unchanged.
2612 //===----------------------------------------------------------------------===//
2613 // Inline Assembly Support
2614 //===----------------------------------------------------------------------===//
2616 /// getConstraintType - Given a constraint letter, return the type of
2617 /// constraint it is for this target.
2618 SPUTargetLowering::ConstraintType
2619 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2620 if (ConstraintLetter.size() == 1) {
2621 switch (ConstraintLetter[0]) {
2628 return C_RegisterClass;
2631 return TargetLowering::getConstraintType(ConstraintLetter);
2634 std::pair<unsigned, const TargetRegisterClass*>
2635 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2636 MVT::ValueType VT) const
2638 if (Constraint.size() == 1) {
2639 // GCC RS6000 Constraint Letters
2640 switch (Constraint[0]) {
2644 return std::make_pair(0U, SPU::R64CRegisterClass);
2645 return std::make_pair(0U, SPU::R32CRegisterClass);
2648 return std::make_pair(0U, SPU::R32FPRegisterClass);
2649 else if (VT == MVT::f64)
2650 return std::make_pair(0U, SPU::R64FPRegisterClass);
2653 return std::make_pair(0U, SPU::GPRCRegisterClass);
2657 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2661 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2665 const SelectionDAG &DAG,
2666 unsigned Depth ) const {
2667 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2670 // LowerAsmOperandForConstraint
2672 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2673 char ConstraintLetter,
2674 std::vector<SDOperand> &Ops,
2675 SelectionDAG &DAG) {
2676 // Default, for the time being, to the base class handler
2677 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2680 /// isLegalAddressImmediate - Return true if the integer value can be used
2681 /// as the offset of the target addressing mode.
2682 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2683 // SPU's addresses are 256K:
2684 return (V > -(1 << 18) && V < (1 << 18) - 1);
2687 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {