1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "llvm/ADT/VectorExtras.h"
18 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT::ValueType mapping to useful data for Cell SPU
41 struct valtype_map_s {
42 const MVT::ValueType valtype;
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
72 << MVT::getValueTypeString(VT)
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDOperand &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDOperand &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 // NOTE: i8 register class is not registered because we cannot determine when
126 // we need to zero or sign extend for custom-lowered loads and stores.
127 // NOTE: Ignore the previous note. For now. :-)
128 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
129 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
130 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
131 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
132 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
133 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
134 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
136 // SPU has no sign or zero extended loads for i1, i8, i16:
137 setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote);
138 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
139 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
140 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
141 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
142 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
143 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
144 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
146 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
147 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
148 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
149 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
150 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
151 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
152 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
153 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
155 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
156 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
157 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
159 // SPU constant load actions are custom lowered:
160 setOperationAction(ISD::Constant, MVT::i64, Custom);
161 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
162 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
164 // SPU's loads and stores have to be custom lowered:
165 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
167 setOperationAction(ISD::LOAD, sctype, Custom);
168 setOperationAction(ISD::STORE, sctype, Custom);
171 // Custom lower BRCOND for i1, i8 to "promote" the result to
172 // i32 and i16, respectively.
173 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
175 // Expand the jumptable branches
176 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
177 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
178 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
180 // SPU has no intrinsics for these particular operations:
181 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
182 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
183 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
185 // PowerPC has no SREM/UREM instructions
186 setOperationAction(ISD::SREM, MVT::i32, Expand);
187 setOperationAction(ISD::UREM, MVT::i32, Expand);
188 setOperationAction(ISD::SREM, MVT::i64, Expand);
189 setOperationAction(ISD::UREM, MVT::i64, Expand);
191 // We don't support sin/cos/sqrt/fmod
192 setOperationAction(ISD::FSIN , MVT::f64, Expand);
193 setOperationAction(ISD::FCOS , MVT::f64, Expand);
194 setOperationAction(ISD::FREM , MVT::f64, Expand);
195 setOperationAction(ISD::FSIN , MVT::f32, Expand);
196 setOperationAction(ISD::FCOS , MVT::f32, Expand);
197 setOperationAction(ISD::FREM , MVT::f32, Expand);
199 // If we're enabling GP optimizations, use hardware square root
200 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
201 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
203 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
204 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
206 // SPU can do rotate right and left, so legalize it... but customize for i8
207 // because instructions don't exist.
208 setOperationAction(ISD::ROTR, MVT::i32, Legal);
209 setOperationAction(ISD::ROTR, MVT::i16, Legal);
210 setOperationAction(ISD::ROTR, MVT::i8, Custom);
211 setOperationAction(ISD::ROTL, MVT::i32, Legal);
212 setOperationAction(ISD::ROTL, MVT::i16, Legal);
213 setOperationAction(ISD::ROTL, MVT::i8, Custom);
214 // SPU has no native version of shift left/right for i8
215 setOperationAction(ISD::SHL, MVT::i8, Custom);
216 setOperationAction(ISD::SRL, MVT::i8, Custom);
217 setOperationAction(ISD::SRA, MVT::i8, Custom);
219 // Custom lower i32 multiplications
220 setOperationAction(ISD::MUL, MVT::i32, Custom);
222 // Need to custom handle (some) common i8 math ops
223 setOperationAction(ISD::SUB, MVT::i8, Custom);
224 setOperationAction(ISD::MUL, MVT::i8, Custom);
226 // SPU does not have BSWAP. It does have i32 support CTLZ.
227 // CTPOP has to be custom lowered.
228 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
229 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
231 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
232 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
233 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
234 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
236 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
237 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
239 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
241 // SPU does not have select or setcc
242 setOperationAction(ISD::SELECT, MVT::i1, Expand);
243 setOperationAction(ISD::SELECT, MVT::i8, Expand);
244 setOperationAction(ISD::SELECT, MVT::i16, Expand);
245 setOperationAction(ISD::SELECT, MVT::i32, Expand);
246 setOperationAction(ISD::SELECT, MVT::i64, Expand);
247 setOperationAction(ISD::SELECT, MVT::f32, Expand);
248 setOperationAction(ISD::SELECT, MVT::f64, Expand);
250 setOperationAction(ISD::SETCC, MVT::i1, Expand);
251 setOperationAction(ISD::SETCC, MVT::i8, Expand);
252 setOperationAction(ISD::SETCC, MVT::i16, Expand);
253 setOperationAction(ISD::SETCC, MVT::i32, Expand);
254 setOperationAction(ISD::SETCC, MVT::i64, Expand);
255 setOperationAction(ISD::SETCC, MVT::f32, Expand);
256 setOperationAction(ISD::SETCC, MVT::f64, Expand);
258 // SPU has a legal FP -> signed INT instruction
259 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
260 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
261 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
262 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
264 // FDIV on SPU requires custom lowering
265 setOperationAction(ISD::FDIV, MVT::f32, Custom);
266 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
268 // SPU has [U|S]INT_TO_FP
269 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
270 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
271 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
272 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
273 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
274 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
275 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
276 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
278 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
279 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
280 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
281 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
283 // We cannot sextinreg(i1). Expand to shifts.
284 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
286 // Support label based line numbers.
287 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
288 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
290 // We want to legalize GlobalAddress and ConstantPool nodes into the
291 // appropriate instructions to materialize the address.
292 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
294 setOperationAction(ISD::GlobalAddress, sctype, Custom);
295 setOperationAction(ISD::ConstantPool, sctype, Custom);
296 setOperationAction(ISD::JumpTable, sctype, Custom);
299 // RET must be custom lowered, to meet ABI requirements
300 setOperationAction(ISD::RET, MVT::Other, Custom);
302 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
303 setOperationAction(ISD::VASTART , MVT::Other, Custom);
305 // Use the default implementation.
306 setOperationAction(ISD::VAARG , MVT::Other, Expand);
307 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
308 setOperationAction(ISD::VAEND , MVT::Other, Expand);
309 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
310 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
311 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
312 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
314 // Cell SPU has instructions for converting between i64 and fp.
315 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
316 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
318 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
319 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
321 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
322 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
324 // First set operation action for all vector types to expand. Then we
325 // will selectively turn on ones that can be effectively codegen'd.
326 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
327 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
328 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
329 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
330 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
331 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
333 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
334 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
335 // add/sub are legal for all supported vector VT's.
336 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
337 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
338 // mul has to be custom lowered.
339 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
341 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
342 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
343 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
344 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
345 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
346 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
348 // These operations need to be expanded:
349 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
350 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
351 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
352 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
353 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
355 // Custom lower build_vector, constant pool spills, insert and
356 // extract vector elements:
357 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
358 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
359 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
360 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
361 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
362 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
365 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
366 setOperationAction(ISD::AND, MVT::v16i8, Custom);
367 setOperationAction(ISD::OR, MVT::v16i8, Custom);
368 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
369 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
371 setSetCCResultType(MVT::i32);
372 setShiftAmountType(MVT::i32);
373 setSetCCResultContents(ZeroOrOneSetCCResult);
375 setStackPointerRegisterToSaveRestore(SPU::R1);
377 // We have target-specific dag combine patterns for the following nodes:
378 setTargetDAGCombine(ISD::ADD);
380 computeRegisterProperties();
384 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
386 if (node_names.empty()) {
387 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
388 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
389 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
390 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
391 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
392 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
393 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
394 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
395 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
396 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
397 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
398 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
399 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
400 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
401 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
402 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
403 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
404 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
405 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
406 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
407 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
408 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
409 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
410 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
411 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
412 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
413 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
414 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
415 "SPUISD::ROTBYTES_RIGHT_Z";
416 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
417 "SPUISD::ROTBYTES_RIGHT_S";
418 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
419 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
420 "SPUISD::ROTBYTES_LEFT_CHAINED";
421 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
422 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
423 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
424 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
425 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
426 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
429 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
431 return ((i != node_names.end()) ? i->second : 0);
434 //===----------------------------------------------------------------------===//
435 // Calling convention code:
436 //===----------------------------------------------------------------------===//
438 #include "SPUGenCallingConv.inc"
440 //===----------------------------------------------------------------------===//
441 // LowerOperation implementation
442 //===----------------------------------------------------------------------===//
444 /// Aligned load common code for CellSPU
446 \param[in] Op The SelectionDAG load or store operand
447 \param[in] DAG The selection DAG
448 \param[in] ST CellSPU subtarget information structure
449 \param[in,out] alignment Caller initializes this to the load or store node's
450 value from getAlignment(), may be updated while generating the aligned load
451 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
452 offset (divisible by 16, modulo 16 == 0)
453 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
454 offset of the preferred slot (modulo 16 != 0)
455 \param[in,out] VT Caller initializes this value type to the the load or store
456 node's loaded or stored value type; may be updated if an i1-extended load or
458 \param[out] was16aligned true if the base pointer had 16-byte alignment,
459 otherwise false. Can help to determine if the chunk needs to be rotated.
461 Both load and store lowering load a block of data aligned on a 16-byte
462 boundary. This is the common aligned load code shared between both.
465 AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
467 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
468 MVT::ValueType &VT, bool &was16aligned)
470 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
471 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
472 SDOperand basePtr = LSN->getBasePtr();
473 SDOperand chain = LSN->getChain();
475 if (basePtr.getOpcode() == ISD::ADD) {
476 SDOperand Op1 = basePtr.Val->getOperand(1);
478 if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
479 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
481 alignOffs = (int) CN->getValue();
482 prefSlotOffs = (int) (alignOffs & 0xf);
484 // Adjust the rotation amount to ensure that the final result ends up in
485 // the preferred slot:
486 prefSlotOffs -= vtm->prefslot_byte;
487 basePtr = basePtr.getOperand(0);
489 // Loading from memory, can we adjust alignment?
490 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
491 SDOperand APtr = basePtr.getOperand(0);
492 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
493 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
494 alignment = GSDN->getGlobal()->getAlignment();
499 prefSlotOffs = -vtm->prefslot_byte;
503 prefSlotOffs = -vtm->prefslot_byte;
506 if (alignment == 16) {
507 // Realign the base pointer as a D-Form address:
508 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
509 basePtr = DAG.getNode(ISD::ADD, PtrVT,
511 DAG.getConstant((alignOffs & ~0xf), PtrVT));
514 // Emit the vector load:
516 return DAG.getLoad(MVT::v16i8, chain, basePtr,
517 LSN->getSrcValue(), LSN->getSrcValueOffset(),
518 LSN->isVolatile(), 16);
521 // Unaligned load or we're using the "large memory" model, which means that
522 // we have to be very pessimistic:
523 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
524 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
528 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
529 DAG.getConstant((alignOffs & ~0xf), PtrVT));
530 was16aligned = false;
531 return DAG.getLoad(MVT::v16i8, chain, basePtr,
532 LSN->getSrcValue(), LSN->getSrcValueOffset(),
533 LSN->isVolatile(), 16);
536 /// Custom lower loads for CellSPU
538 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
539 within a 16-byte block, we have to rotate to extract the requested element.
542 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
543 LoadSDNode *LN = cast<LoadSDNode>(Op);
544 SDOperand the_chain = LN->getChain();
545 MVT::ValueType VT = LN->getMemoryVT();
546 MVT::ValueType OpVT = Op.Val->getValueType(0);
547 ISD::LoadExtType ExtType = LN->getExtensionType();
548 unsigned alignment = LN->getAlignment();
551 switch (LN->getAddressingMode()) {
552 case ISD::UNINDEXED: {
556 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
561 the_chain = result.getValue(1);
562 // Rotate the chunk if necessary
565 if (rotamt != 0 || !was16aligned) {
566 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
571 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
573 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
574 LoadSDNode *LN1 = cast<LoadSDNode>(result);
575 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
576 DAG.getConstant(rotamt, PtrVT));
579 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
580 the_chain = result.getValue(1);
583 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
585 MVT::ValueType vecVT = MVT::v16i8;
587 // Convert the loaded v16i8 vector to the appropriate vector type
588 // specified by the operand:
591 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
593 vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
596 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
597 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
598 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
599 the_chain = result.getValue(1);
601 // Handle the sign and zero-extending loads for i1 and i8:
604 if (ExtType == ISD::SEXTLOAD) {
605 NewOpC = (OpVT == MVT::i1
606 ? SPUISD::EXTRACT_I1_SEXT
607 : SPUISD::EXTRACT_I8_SEXT);
609 assert(ExtType == ISD::ZEXTLOAD);
610 NewOpC = (OpVT == MVT::i1
611 ? SPUISD::EXTRACT_I1_ZEXT
612 : SPUISD::EXTRACT_I8_ZEXT);
615 result = DAG.getNode(NewOpC, OpVT, result);
618 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
619 SDOperand retops[2] = {
624 result = DAG.getNode(SPUISD::LDRESULT, retvts,
625 retops, sizeof(retops) / sizeof(retops[0]));
632 case ISD::LAST_INDEXED_MODE:
633 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
635 cerr << (unsigned) LN->getAddressingMode() << "\n";
643 /// Custom lower stores for CellSPU
645 All CellSPU stores are aligned to 16-byte boundaries, so for elements
646 within a 16-byte block, we have to generate a shuffle to insert the
647 requested element into its place, then store the resulting block.
650 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
651 StoreSDNode *SN = cast<StoreSDNode>(Op);
652 SDOperand Value = SN->getValue();
653 MVT::ValueType VT = Value.getValueType();
654 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
655 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
656 unsigned alignment = SN->getAlignment();
658 switch (SN->getAddressingMode()) {
659 case ISD::UNINDEXED: {
660 int chunk_offset, slot_offset;
663 // The vector type we really want to load from the 16-byte chunk, except
664 // in the case of MVT::i1, which has to be v16i8.
665 unsigned vecVT, stVecVT = MVT::v16i8;
668 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
669 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
671 SDOperand alignLoadVec =
672 AlignedLoad(Op, DAG, ST, SN, alignment,
673 chunk_offset, slot_offset, VT, was16aligned);
675 if (alignLoadVec.Val == 0)
678 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
679 SDOperand basePtr = LN->getBasePtr();
680 SDOperand the_chain = alignLoadVec.getValue(1);
681 SDOperand theValue = SN->getValue();
685 && (theValue.getOpcode() == ISD::AssertZext
686 || theValue.getOpcode() == ISD::AssertSext)) {
687 // Drill down and get the value for zero- and sign-extended
689 theValue = theValue.getOperand(0);
694 SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
695 SDOperand insertEltPtr;
696 SDOperand insertEltOp;
698 // If the base pointer is already a D-form address, then just create
699 // a new D-form address with a slot offset and the orignal base pointer.
700 // Otherwise generate a D-form address with the slot offset relative
701 // to the stack pointer, which is always aligned.
702 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
703 DEBUG(basePtr.Val->dump(&DAG));
706 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
707 (basePtr.getOpcode() == ISD::ADD
708 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
709 insertEltPtr = basePtr;
712 // $sp is always aligned, so use it when necessary to avoid loading
715 basePtr.Val->hasOneUse() ? DAG.getRegister(SPU::R1, PtrVT) : basePtr;
716 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, ptrP, insertEltOffs);
718 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
722 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
723 result = DAG.getNode(SPUISD::SHUFB, vecVT,
724 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
726 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
728 result = DAG.getStore(the_chain, result, basePtr,
729 LN->getSrcValue(), LN->getSrcValueOffset(),
730 LN->isVolatile(), LN->getAlignment());
739 case ISD::LAST_INDEXED_MODE:
740 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
742 cerr << (unsigned) SN->getAddressingMode() << "\n";
750 /// Generate the address of a constant pool entry.
752 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
753 MVT::ValueType PtrVT = Op.getValueType();
754 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
755 Constant *C = CP->getConstVal();
756 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
757 SDOperand Zero = DAG.getConstant(0, PtrVT);
758 const TargetMachine &TM = DAG.getTarget();
760 if (TM.getRelocationModel() == Reloc::Static) {
761 if (!ST->usingLargeMem()) {
762 // Just return the SDOperand with the constant pool address in it.
763 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
766 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
767 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
769 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
771 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, CPI, Zero);
777 "LowerConstantPool: Relocation model other than static not supported.");
782 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
783 MVT::ValueType PtrVT = Op.getValueType();
784 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
785 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
786 SDOperand Zero = DAG.getConstant(0, PtrVT);
787 const TargetMachine &TM = DAG.getTarget();
789 if (TM.getRelocationModel() == Reloc::Static) {
790 SDOperand JmpAForm = DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
791 return (!ST->usingLargeMem()
793 : DAG.getNode(SPUISD::IndirectAddr, PtrVT, JmpAForm, Zero));
797 "LowerJumpTable: Relocation model other than static not supported.");
802 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
803 MVT::ValueType PtrVT = Op.getValueType();
804 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
805 GlobalValue *GV = GSDN->getGlobal();
806 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
807 const TargetMachine &TM = DAG.getTarget();
808 SDOperand Zero = DAG.getConstant(0, PtrVT);
810 if (TM.getRelocationModel() == Reloc::Static) {
811 if (!ST->usingLargeMem()) {
812 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
814 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
815 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
816 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
819 cerr << "LowerGlobalAddress: Relocation model other than static not "
828 //! Custom lower i64 integer constants
830 This code inserts all of the necessary juggling that needs to occur to load
831 a 64-bit constant into a register.
834 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
835 unsigned VT = Op.getValueType();
836 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
838 if (VT == MVT::i64) {
839 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
840 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
841 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
844 cerr << "LowerConstant: unhandled constant type "
845 << MVT::getValueTypeString(VT)
854 //! Custom lower single precision floating point constants
856 "float" immediates can be lowered as if they were unsigned 32-bit integers.
857 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
861 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
862 unsigned VT = Op.getValueType();
863 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
866 "LowerConstantFP: Node is not ConstantFPSDNode");
868 if (VT == MVT::f32) {
869 float targetConst = FP->getValueAPF().convertToFloat();
870 return DAG.getNode(SPUISD::SFPConstant, VT,
871 DAG.getTargetConstantFP(targetConst, VT));
872 } else if (VT == MVT::f64) {
873 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
874 return DAG.getNode(ISD::BIT_CONVERT, VT,
875 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
881 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
883 LowerBRCOND(SDOperand Op, SelectionDAG &DAG)
885 SDOperand Cond = Op.getOperand(1);
886 MVT::ValueType CondVT = Cond.getValueType();
887 MVT::ValueType CondNVT;
889 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
890 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
891 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
893 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
896 return SDOperand(); // Unchanged
900 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
902 MachineFunction &MF = DAG.getMachineFunction();
903 MachineFrameInfo *MFI = MF.getFrameInfo();
904 MachineRegisterInfo &RegInfo = MF.getRegInfo();
905 SmallVector<SDOperand, 8> ArgValues;
906 SDOperand Root = Op.getOperand(0);
907 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
909 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
910 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
912 unsigned ArgOffset = SPUFrameInfo::minStackSize();
913 unsigned ArgRegIdx = 0;
914 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
916 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
918 // Add DAG nodes to load the arguments or copy them out of registers.
919 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
921 bool needsLoad = false;
922 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
923 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
927 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
928 << MVT::getValueTypeString(ObjectVT)
933 if (!isVarArg && ArgRegIdx < NumArgRegs) {
934 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
935 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
936 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
943 if (!isVarArg && ArgRegIdx < NumArgRegs) {
944 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
945 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
946 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
953 if (!isVarArg && ArgRegIdx < NumArgRegs) {
954 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
955 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
956 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
963 if (!isVarArg && ArgRegIdx < NumArgRegs) {
964 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
965 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
966 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
973 if (!isVarArg && ArgRegIdx < NumArgRegs) {
974 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
975 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
976 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
983 if (!isVarArg && ArgRegIdx < NumArgRegs) {
984 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
985 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
986 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
997 if (!isVarArg && ArgRegIdx < NumArgRegs) {
998 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
999 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1000 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1008 // We need to load the argument to a virtual register if we determined above
1009 // that we ran out of physical registers of the appropriate type
1011 // If the argument is actually used, emit a load from the right stack
1013 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
1014 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1015 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1016 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1018 // Don't emit a dead load.
1019 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
1022 ArgOffset += StackSlotSize;
1025 ArgValues.push_back(ArgVal);
1028 // If the function takes variable number of arguments, make a frame index for
1029 // the start of the first vararg value... for expansion of llvm.va_start.
1031 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1033 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1034 // If this function is vararg, store any remaining integer argument regs to
1035 // their spots on the stack so that they may be loaded by deferencing the
1036 // result of va_next.
1037 SmallVector<SDOperand, 8> MemOps;
1038 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1039 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1040 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1041 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1042 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1043 MemOps.push_back(Store);
1044 // Increment the address by four for the next argument to store
1045 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1046 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1048 if (!MemOps.empty())
1049 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1052 ArgValues.push_back(Root);
1054 // Return the new list of results.
1055 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1056 Op.Val->value_end());
1057 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1060 /// isLSAAddress - Return the immediate to use if the specified
1061 /// value is representable as a LSA address.
1062 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1063 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1066 int Addr = C->getValue();
1067 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1068 (Addr << 14 >> 14) != Addr)
1069 return 0; // Top 14 bits have to be sext of immediate.
1071 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1076 LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1077 SDOperand Chain = Op.getOperand(0);
1079 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1080 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1082 SDOperand Callee = Op.getOperand(4);
1083 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1084 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1085 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1086 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1088 // Handy pointer type
1089 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1091 // Accumulate how many bytes are to be pushed on the stack, including the
1092 // linkage area, and parameter passing area. According to the SPU ABI,
1093 // we minimally need space for [LR] and [SP]
1094 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1096 // Set up a copy of the stack pointer for use loading and storing any
1097 // arguments that may not fit in the registers available for argument
1099 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1101 // Figure out which arguments are going to go in registers, and which in
1103 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1104 unsigned ArgRegIdx = 0;
1106 // Keep track of registers passing arguments
1107 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1108 // And the arguments passed on the stack
1109 SmallVector<SDOperand, 8> MemOpChains;
1111 for (unsigned i = 0; i != NumOps; ++i) {
1112 SDOperand Arg = Op.getOperand(5+2*i);
1114 // PtrOff will be used to store the current argument to the stack if a
1115 // register cannot be found for it.
1116 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1117 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1119 switch (Arg.getValueType()) {
1120 default: assert(0 && "Unexpected ValueType for argument!");
1124 if (ArgRegIdx != NumArgRegs) {
1125 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1127 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1128 ArgOffset += StackSlotSize;
1133 if (ArgRegIdx != NumArgRegs) {
1134 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1136 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1137 ArgOffset += StackSlotSize;
1144 if (ArgRegIdx != NumArgRegs) {
1145 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1147 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1148 ArgOffset += StackSlotSize;
1154 // Update number of stack bytes actually used, insert a call sequence start
1155 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1156 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1158 if (!MemOpChains.empty()) {
1159 // Adjust the stack pointer for the stack arguments.
1160 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1161 &MemOpChains[0], MemOpChains.size());
1164 // Build a sequence of copy-to-reg nodes chained together with token chain
1165 // and flag operands which copy the outgoing args into the appropriate regs.
1167 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1168 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1170 InFlag = Chain.getValue(1);
1173 std::vector<MVT::ValueType> NodeTys;
1174 NodeTys.push_back(MVT::Other); // Returns a chain
1175 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1177 SmallVector<SDOperand, 8> Ops;
1178 unsigned CallOpc = SPUISD::CALL;
1180 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1181 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1182 // node so that legalize doesn't hack it.
1183 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1184 GlobalValue *GV = G->getGlobal();
1185 unsigned CalleeVT = Callee.getValueType();
1186 SDOperand Zero = DAG.getConstant(0, PtrVT);
1187 SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1189 if (!ST->usingLargeMem()) {
1190 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1191 // style calls, otherwise, external symbols are BRASL calls. This assumes
1192 // that declared/defined symbols are in the same compilation unit and can
1193 // be reached through PC-relative jumps.
1196 // This may be an unsafe assumption for JIT and really large compilation
1198 if (GV->isDeclaration()) {
1199 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1201 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1204 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1206 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1208 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1209 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1210 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1211 // If this is an absolute destination address that appears to be a legal
1212 // local store address, use the munged value.
1213 Callee = SDOperand(Dest, 0);
1216 Ops.push_back(Chain);
1217 Ops.push_back(Callee);
1219 // Add argument registers to the end of the list so that they are known live
1221 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1222 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1223 RegsToPass[i].second.getValueType()));
1226 Ops.push_back(InFlag);
1227 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1228 InFlag = Chain.getValue(1);
1230 SDOperand ResultVals[3];
1231 unsigned NumResults = 0;
1234 // If the call has results, copy the values out of the ret val registers.
1235 switch (Op.Val->getValueType(0)) {
1236 default: assert(0 && "Unexpected ret value!");
1237 case MVT::Other: break;
1239 if (Op.Val->getValueType(1) == MVT::i32) {
1240 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1241 ResultVals[0] = Chain.getValue(0);
1242 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1243 Chain.getValue(2)).getValue(1);
1244 ResultVals[1] = Chain.getValue(0);
1246 NodeTys.push_back(MVT::i32);
1248 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1249 ResultVals[0] = Chain.getValue(0);
1252 NodeTys.push_back(MVT::i32);
1255 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1256 ResultVals[0] = Chain.getValue(0);
1258 NodeTys.push_back(MVT::i64);
1262 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1263 InFlag).getValue(1);
1264 ResultVals[0] = Chain.getValue(0);
1266 NodeTys.push_back(Op.Val->getValueType(0));
1273 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1274 InFlag).getValue(1);
1275 ResultVals[0] = Chain.getValue(0);
1277 NodeTys.push_back(Op.Val->getValueType(0));
1281 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1282 DAG.getConstant(NumStackBytes, PtrVT));
1283 NodeTys.push_back(MVT::Other);
1285 // If the function returns void, just return the chain.
1286 if (NumResults == 0)
1289 // Otherwise, merge everything together with a MERGE_VALUES node.
1290 ResultVals[NumResults++] = Chain;
1291 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1292 ResultVals, NumResults);
1293 return Res.getValue(Op.ResNo);
1297 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1298 SmallVector<CCValAssign, 16> RVLocs;
1299 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1300 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1301 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1302 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1304 // If this is the first return lowered for this function, add the regs to the
1305 // liveout set for the function.
1306 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1307 for (unsigned i = 0; i != RVLocs.size(); ++i)
1308 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1311 SDOperand Chain = Op.getOperand(0);
1314 // Copy the result values into the output registers.
1315 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1316 CCValAssign &VA = RVLocs[i];
1317 assert(VA.isRegLoc() && "Can only return in registers!");
1318 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1319 Flag = Chain.getValue(1);
1323 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1325 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1329 //===----------------------------------------------------------------------===//
1330 // Vector related lowering:
1331 //===----------------------------------------------------------------------===//
1333 static ConstantSDNode *
1334 getVecImm(SDNode *N) {
1335 SDOperand OpVal(0, 0);
1337 // Check to see if this buildvec has a single non-undef value in its elements.
1338 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1339 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1341 OpVal = N->getOperand(i);
1342 else if (OpVal != N->getOperand(i))
1346 if (OpVal.Val != 0) {
1347 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1352 return 0; // All UNDEF: use implicit def.; not Constant node
1355 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1356 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1358 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1359 MVT::ValueType ValueType) {
1360 if (ConstantSDNode *CN = getVecImm(N)) {
1361 uint64_t Value = CN->getValue();
1362 if (Value <= 0x3ffff)
1363 return DAG.getConstant(Value, ValueType);
1369 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1370 /// and the value fits into a signed 16-bit constant, and if so, return the
1372 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1373 MVT::ValueType ValueType) {
1374 if (ConstantSDNode *CN = getVecImm(N)) {
1375 if (ValueType == MVT::i32) {
1376 int Value = (int) CN->getValue();
1377 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1379 if (Value == SExtValue)
1380 return DAG.getConstant(Value, ValueType);
1381 } else if (ValueType == MVT::i16) {
1382 short Value = (short) CN->getValue();
1383 int SExtValue = ((int) Value << 16) >> 16;
1385 if (Value == (short) SExtValue)
1386 return DAG.getConstant(Value, ValueType);
1387 } else if (ValueType == MVT::i64) {
1388 int64_t Value = CN->getValue();
1389 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1391 if (Value == SExtValue)
1392 return DAG.getConstant(Value, ValueType);
1399 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1400 /// and the value fits into a signed 10-bit constant, and if so, return the
1402 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1403 MVT::ValueType ValueType) {
1404 if (ConstantSDNode *CN = getVecImm(N)) {
1405 int Value = (int) CN->getValue();
1406 if ((ValueType == MVT::i32 && isS10Constant(Value))
1407 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1408 return DAG.getConstant(Value, ValueType);
1414 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1415 /// and the value fits into a signed 8-bit constant, and if so, return the
1418 /// @note: The incoming vector is v16i8 because that's the only way we can load
1419 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1421 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1422 MVT::ValueType ValueType) {
1423 if (ConstantSDNode *CN = getVecImm(N)) {
1424 int Value = (int) CN->getValue();
1425 if (ValueType == MVT::i16
1426 && Value <= 0xffff /* truncated from uint64_t */
1427 && ((short) Value >> 8) == ((short) Value & 0xff))
1428 return DAG.getConstant(Value & 0xff, ValueType);
1429 else if (ValueType == MVT::i8
1430 && (Value & 0xff) == Value)
1431 return DAG.getConstant(Value, ValueType);
1437 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1438 /// and the value fits into a signed 16-bit constant, and if so, return the
1440 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1441 MVT::ValueType ValueType) {
1442 if (ConstantSDNode *CN = getVecImm(N)) {
1443 uint64_t Value = CN->getValue();
1444 if ((ValueType == MVT::i32
1445 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1446 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1447 return DAG.getConstant(Value >> 16, ValueType);
1453 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1454 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1455 if (ConstantSDNode *CN = getVecImm(N)) {
1456 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1462 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1463 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1464 if (ConstantSDNode *CN = getVecImm(N)) {
1465 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1471 // If this is a vector of constants or undefs, get the bits. A bit in
1472 // UndefBits is set if the corresponding element of the vector is an
1473 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1474 // zero. Return true if this is not an array of constants, false if it is.
1476 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1477 uint64_t UndefBits[2]) {
1478 // Start with zero'd results.
1479 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1481 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1482 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1483 SDOperand OpVal = BV->getOperand(i);
1485 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1486 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1488 uint64_t EltBits = 0;
1489 if (OpVal.getOpcode() == ISD::UNDEF) {
1490 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1491 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1493 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1494 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1495 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1496 const APFloat &apf = CN->getValueAPF();
1497 EltBits = (CN->getValueType(0) == MVT::f32
1498 ? FloatToBits(apf.convertToFloat())
1499 : DoubleToBits(apf.convertToDouble()));
1501 // Nonconstant element.
1505 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1508 //printf("%llx %llx %llx %llx\n",
1509 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1513 /// If this is a splat (repetition) of a value across the whole vector, return
1514 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1515 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1516 /// SplatSize = 1 byte.
1517 static bool isConstantSplat(const uint64_t Bits128[2],
1518 const uint64_t Undef128[2],
1520 uint64_t &SplatBits, uint64_t &SplatUndef,
1522 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1523 // the same as the lower 64-bits, ignoring undefs.
1524 uint64_t Bits64 = Bits128[0] | Bits128[1];
1525 uint64_t Undef64 = Undef128[0] & Undef128[1];
1526 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1527 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1528 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1529 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1531 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1532 if (MinSplatBits < 64) {
1534 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1536 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1537 if (MinSplatBits < 32) {
1539 // If the top 16-bits are different than the lower 16-bits, ignoring
1540 // undefs, we have an i32 splat.
1541 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1542 if (MinSplatBits < 16) {
1543 // If the top 8-bits are different than the lower 8-bits, ignoring
1544 // undefs, we have an i16 splat.
1545 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1546 // Otherwise, we have an 8-bit splat.
1547 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1548 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1554 SplatUndef = Undef16;
1561 SplatUndef = Undef32;
1567 SplatBits = Bits128[0];
1568 SplatUndef = Undef128[0];
1574 return false; // Can't be a splat if two pieces don't match.
1577 // If this is a case we can't handle, return null and let the default
1578 // expansion code take care of it. If we CAN select this case, and if it
1579 // selects to a single instruction, return Op. Otherwise, if we can codegen
1580 // this case more efficiently than a constant pool load, lower it to the
1581 // sequence of ops that should be used.
1582 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1583 MVT::ValueType VT = Op.getValueType();
1584 // If this is a vector of constants or undefs, get the bits. A bit in
1585 // UndefBits is set if the corresponding element of the vector is an
1586 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1588 uint64_t VectorBits[2];
1589 uint64_t UndefBits[2];
1590 uint64_t SplatBits, SplatUndef;
1592 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1593 || !isConstantSplat(VectorBits, UndefBits,
1594 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1595 SplatBits, SplatUndef, SplatSize))
1596 return SDOperand(); // Not a constant vector, not a splat.
1601 uint32_t Value32 = SplatBits;
1602 assert(SplatSize == 4
1603 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1604 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1605 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1606 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1607 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1611 uint64_t f64val = SplatBits;
1612 assert(SplatSize == 8
1613 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1614 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1615 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1616 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1617 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1621 // 8-bit constants have to be expanded to 16-bits
1622 unsigned short Value16 = SplatBits | (SplatBits << 8);
1624 for (int i = 0; i < 8; ++i)
1625 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1626 return DAG.getNode(ISD::BIT_CONVERT, VT,
1627 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1630 unsigned short Value16;
1632 Value16 = (unsigned short) (SplatBits & 0xffff);
1634 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1635 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1637 for (int i = 0; i < 8; ++i) Ops[i] = T;
1638 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1641 unsigned int Value = SplatBits;
1642 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1643 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1646 uint64_t val = SplatBits;
1647 uint32_t upper = uint32_t(val >> 32);
1648 uint32_t lower = uint32_t(val);
1653 SmallVector<SDOperand, 16> ShufBytes;
1655 bool upper_special, lower_special;
1657 // NOTE: This code creates common-case shuffle masks that can be easily
1658 // detected as common expressions. It is not attempting to create highly
1659 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1661 // Detect if the upper or lower half is a special shuffle mask pattern:
1662 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1663 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1665 // Create lower vector if not a special pattern
1666 if (!lower_special) {
1667 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1668 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1669 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1670 LO32C, LO32C, LO32C, LO32C));
1673 // Create upper vector if not a special pattern
1674 if (!upper_special) {
1675 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1676 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1677 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1678 HI32C, HI32C, HI32C, HI32C));
1681 // If either upper or lower are special, then the two input operands are
1682 // the same (basically, one of them is a "don't care")
1687 if (lower_special && upper_special) {
1688 // Unhappy situation... both upper and lower are special, so punt with
1689 // a target constant:
1690 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1691 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1695 for (int i = 0; i < 4; ++i) {
1696 for (int j = 0; j < 4; ++j) {
1698 bool process_upper, process_lower;
1701 process_upper = (upper_special && (i & 1) == 0);
1702 process_lower = (lower_special && (i & 1) == 1);
1704 if (process_upper || process_lower) {
1705 if ((process_upper && upper == 0)
1706 || (process_lower && lower == 0))
1708 else if ((process_upper && upper == 0xffffffff)
1709 || (process_lower && lower == 0xffffffff))
1711 else if ((process_upper && upper == 0x80000000)
1712 || (process_lower && lower == 0x80000000))
1713 val = (j == 0 ? 0xe0 : 0x80);
1715 val = i * 4 + j + ((i & 1) * 16);
1717 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1721 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1722 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1723 &ShufBytes[0], ShufBytes.size()));
1725 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1726 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1727 return DAG.getNode(ISD::BIT_CONVERT, VT,
1728 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1729 Zero, Zero, Zero, Zero));
1737 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1738 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1739 /// permutation vector, V3, is monotonically increasing with one "exception"
1740 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1741 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1742 /// In either case, the net result is going to eventually invoke SHUFB to
1743 /// permute/shuffle the bytes from V1 and V2.
1745 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1746 /// control word for byte/halfword/word insertion. This takes care of a single
1747 /// element move from V2 into V1.
1749 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1750 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1751 SDOperand V1 = Op.getOperand(0);
1752 SDOperand V2 = Op.getOperand(1);
1753 SDOperand PermMask = Op.getOperand(2);
1755 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1757 // If we have a single element being moved from V1 to V2, this can be handled
1758 // using the C*[DX] compute mask instructions, but the vector elements have
1759 // to be monotonically increasing with one exception element.
1760 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1761 unsigned EltsFromV2 = 0;
1763 unsigned V2EltIdx0 = 0;
1764 unsigned CurrElt = 0;
1765 bool monotonic = true;
1766 if (EltVT == MVT::i8)
1768 else if (EltVT == MVT::i16)
1770 else if (EltVT == MVT::i32)
1773 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1775 for (unsigned i = 0, e = PermMask.getNumOperands();
1776 EltsFromV2 <= 1 && monotonic && i != e;
1779 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1782 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1784 if (SrcElt >= V2EltIdx0) {
1786 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1787 } else if (CurrElt != SrcElt) {
1794 if (EltsFromV2 == 1 && monotonic) {
1795 // Compute mask and shuffle
1796 MachineFunction &MF = DAG.getMachineFunction();
1797 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1798 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1799 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1800 // Initialize temporary register to 0
1801 SDOperand InitTempReg =
1802 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1803 // Copy register's contents as index in INSERT_MASK:
1804 SDOperand ShufMaskOp =
1805 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1806 DAG.getTargetConstant(V2Elt, MVT::i32),
1807 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1808 // Use shuffle mask in SHUFB synthetic instruction:
1809 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1811 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1812 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1814 SmallVector<SDOperand, 16> ResultMask;
1815 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1817 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1820 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1822 for (unsigned j = 0; j != BytesPerElement; ++j) {
1823 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1828 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1829 &ResultMask[0], ResultMask.size());
1830 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1834 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1835 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1837 if (Op0.Val->getOpcode() == ISD::Constant) {
1838 // For a constant, build the appropriate constant vector, which will
1839 // eventually simplify to a vector register load.
1841 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1842 SmallVector<SDOperand, 16> ConstVecValues;
1846 // Create a constant vector:
1847 switch (Op.getValueType()) {
1848 default: assert(0 && "Unexpected constant value type in "
1849 "LowerSCALAR_TO_VECTOR");
1850 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1851 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1852 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1853 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1854 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1855 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1858 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1859 for (size_t j = 0; j < n_copies; ++j)
1860 ConstVecValues.push_back(CValue);
1862 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1863 &ConstVecValues[0], ConstVecValues.size());
1865 // Otherwise, copy the value from one register to another:
1866 switch (Op0.getValueType()) {
1867 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1874 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1881 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1882 switch (Op.getValueType()) {
1884 SDOperand rA = Op.getOperand(0);
1885 SDOperand rB = Op.getOperand(1);
1886 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1887 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1888 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1889 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1891 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1895 // Multiply two v8i16 vectors (pipeline friendly version):
1896 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1897 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1898 // c) Use SELB to select upper and lower halves from the intermediate results
1900 // NOTE: We really want to move the FSMBI to earlier to actually get the
1901 // dual-issue. This code does manage to do this, even if it's a little on
1904 MachineFunction &MF = DAG.getMachineFunction();
1905 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1906 SDOperand Chain = Op.getOperand(0);
1907 SDOperand rA = Op.getOperand(0);
1908 SDOperand rB = Op.getOperand(1);
1909 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1910 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1913 DAG.getCopyToReg(Chain, FSMBIreg,
1914 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1915 DAG.getConstant(0xcccc, MVT::i32)));
1918 DAG.getCopyToReg(FSMBOp, HiProdReg,
1919 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1921 SDOperand HHProd_v4i32 =
1922 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1923 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1925 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1926 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1927 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1928 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1930 DAG.getConstant(16, MVT::i16))),
1931 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1934 // This M00sE is N@stI! (apologies to Monty Python)
1936 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1937 // is to break it all apart, sign extend, and reassemble the various
1938 // intermediate products.
1940 MachineFunction &MF = DAG.getMachineFunction();
1941 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1942 SDOperand Chain = Op.getOperand(0);
1943 SDOperand rA = Op.getOperand(0);
1944 SDOperand rB = Op.getOperand(1);
1945 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1946 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1948 unsigned FSMBreg_2222 = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1949 unsigned LoProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1950 unsigned HiProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1953 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1954 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1955 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1957 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1959 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1962 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1963 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1965 SDOperand FSMBdef_2222 =
1966 DAG.getCopyToReg(Chain, FSMBreg_2222,
1967 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1968 DAG.getConstant(0x2222, MVT::i32)));
1970 SDOperand FSMBuse_2222 =
1971 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1973 SDOperand LoProd_1 =
1974 DAG.getCopyToReg(Chain, LoProd_reg,
1975 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1978 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1981 DAG.getNode(ISD::AND, MVT::v4i32,
1982 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1983 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1984 LoProdMask, LoProdMask,
1985 LoProdMask, LoProdMask));
1988 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1989 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1992 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1993 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1996 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1997 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1998 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2000 SDOperand HHProd_1 =
2001 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2002 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2003 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
2004 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2005 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
2008 DAG.getCopyToReg(Chain, HiProd_reg,
2009 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2011 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2015 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
2016 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
2018 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2019 DAG.getNode(ISD::OR, MVT::v4i32,
2024 cerr << "CellSPU: Unknown vector multiplication, got "
2025 << MVT::getValueTypeString(Op.getValueType())
2034 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2035 MachineFunction &MF = DAG.getMachineFunction();
2036 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2038 SDOperand A = Op.getOperand(0);
2039 SDOperand B = Op.getOperand(1);
2040 unsigned VT = Op.getValueType();
2042 unsigned VRegBR, VRegC;
2044 if (VT == MVT::f32) {
2045 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2046 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2048 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2049 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2051 // TODO: make sure we're feeding FPInterp the right arguments
2052 // Right now: fi B, frest(B)
2055 // (Floating Interpolate (FP Reciprocal Estimate B))
2057 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2058 DAG.getNode(SPUISD::FPInterp, VT, B,
2059 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2061 // Computes A * BRcpl and stores in a temporary register
2063 DAG.getCopyToReg(BRcpl, VRegC,
2064 DAG.getNode(ISD::FMUL, VT, A,
2065 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2066 // What's the Chain variable do? It's magic!
2067 // TODO: set Chain = Op(0).getEntryNode()
2069 return DAG.getNode(ISD::FADD, VT,
2070 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2071 DAG.getNode(ISD::FMUL, VT,
2072 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2073 DAG.getNode(ISD::FSUB, VT, A,
2074 DAG.getNode(ISD::FMUL, VT, B,
2075 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2078 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2079 unsigned VT = Op.getValueType();
2080 SDOperand N = Op.getOperand(0);
2081 SDOperand Elt = Op.getOperand(1);
2082 SDOperand ShufMask[16];
2083 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2085 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2087 int EltNo = (int) C->getValue();
2090 if (VT == MVT::i8 && EltNo >= 16)
2091 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2092 else if (VT == MVT::i16 && EltNo >= 8)
2093 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2094 else if (VT == MVT::i32 && EltNo >= 4)
2095 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2096 else if (VT == MVT::i64 && EltNo >= 2)
2097 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2099 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2100 // i32 and i64: Element 0 is the preferred slot
2101 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2104 // Need to generate shuffle mask and extract:
2105 int prefslot_begin = -1, prefslot_end = -1;
2106 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2110 prefslot_begin = prefslot_end = 3;
2114 prefslot_begin = 2; prefslot_end = 3;
2118 prefslot_begin = 0; prefslot_end = 3;
2122 prefslot_begin = 0; prefslot_end = 7;
2127 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2128 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2130 for (int i = 0; i < 16; ++i) {
2131 // zero fill uppper part of preferred slot, don't care about the
2133 unsigned int mask_val;
2135 if (i <= prefslot_end) {
2137 ((i < prefslot_begin)
2139 : elt_byte + (i - prefslot_begin));
2141 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2143 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2146 SDOperand ShufMaskVec =
2147 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2149 sizeof(ShufMask) / sizeof(ShufMask[0]));
2151 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2152 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2153 N, N, ShufMaskVec));
2157 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2158 SDOperand VecOp = Op.getOperand(0);
2159 SDOperand ValOp = Op.getOperand(1);
2160 SDOperand IdxOp = Op.getOperand(2);
2161 MVT::ValueType VT = Op.getValueType();
2163 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2164 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2166 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2167 // Use $2 because it's always 16-byte aligned and it's available:
2168 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2171 DAG.getNode(SPUISD::SHUFB, VT,
2172 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2174 DAG.getNode(SPUISD::INSERT_MASK, VT,
2175 DAG.getNode(ISD::ADD, PtrVT,
2177 DAG.getConstant(CN->getValue(),
2183 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2184 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2186 assert(Op.getValueType() == MVT::i8);
2189 assert(0 && "Unhandled i8 math operator");
2193 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2195 SDOperand N1 = Op.getOperand(1);
2196 N0 = (N0.getOpcode() != ISD::Constant
2197 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2198 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2199 N1 = (N1.getOpcode() != ISD::Constant
2200 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2201 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2202 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2203 DAG.getNode(Opc, MVT::i16, N0, N1));
2207 SDOperand N1 = Op.getOperand(1);
2209 N0 = (N0.getOpcode() != ISD::Constant
2210 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2211 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2212 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2213 N1 = (N1.getOpcode() != ISD::Constant
2214 ? DAG.getNode(N1Opc, MVT::i16, N1)
2215 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2216 SDOperand ExpandArg =
2217 DAG.getNode(ISD::OR, MVT::i16, N0,
2218 DAG.getNode(ISD::SHL, MVT::i16,
2219 N0, DAG.getConstant(8, MVT::i16)));
2220 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2221 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2225 SDOperand N1 = Op.getOperand(1);
2227 N0 = (N0.getOpcode() != ISD::Constant
2228 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2229 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2230 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2231 N1 = (N1.getOpcode() != ISD::Constant
2232 ? DAG.getNode(N1Opc, MVT::i16, N1)
2233 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2234 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2235 DAG.getNode(Opc, MVT::i16, N0, N1));
2238 SDOperand N1 = Op.getOperand(1);
2240 N0 = (N0.getOpcode() != ISD::Constant
2241 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2242 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2243 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2244 N1 = (N1.getOpcode() != ISD::Constant
2245 ? DAG.getNode(N1Opc, MVT::i16, N1)
2246 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2247 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2248 DAG.getNode(Opc, MVT::i16, N0, N1));
2251 SDOperand N1 = Op.getOperand(1);
2253 N0 = (N0.getOpcode() != ISD::Constant
2254 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2255 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2256 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2257 N1 = (N1.getOpcode() != ISD::Constant
2258 ? DAG.getNode(N1Opc, MVT::i16, N1)
2259 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2260 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2261 DAG.getNode(Opc, MVT::i16, N0, N1));
2269 //! Lower byte immediate operations for v16i8 vectors:
2271 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2274 MVT::ValueType VT = Op.getValueType();
2276 ConstVec = Op.getOperand(0);
2277 Arg = Op.getOperand(1);
2278 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2279 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2280 ConstVec = ConstVec.getOperand(0);
2282 ConstVec = Op.getOperand(1);
2283 Arg = Op.getOperand(0);
2284 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2285 ConstVec = ConstVec.getOperand(0);
2290 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2291 uint64_t VectorBits[2];
2292 uint64_t UndefBits[2];
2293 uint64_t SplatBits, SplatUndef;
2296 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2297 && isConstantSplat(VectorBits, UndefBits,
2298 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2299 SplatBits, SplatUndef, SplatSize)) {
2300 SDOperand tcVec[16];
2301 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2302 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2304 // Turn the BUILD_VECTOR into a set of target constants:
2305 for (size_t i = 0; i < tcVecSize; ++i)
2308 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2309 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2316 //! Lower i32 multiplication
2317 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2321 cerr << "CellSPU: Unknown LowerMUL value type, got "
2322 << MVT::getValueTypeString(Op.getValueType())
2328 SDOperand rA = Op.getOperand(0);
2329 SDOperand rB = Op.getOperand(1);
2331 return DAG.getNode(ISD::ADD, MVT::i32,
2332 DAG.getNode(ISD::ADD, MVT::i32,
2333 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2334 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2335 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2342 //! Custom lowering for CTPOP (count population)
2344 Custom lowering code that counts the number ones in the input
2345 operand. SPU has such an instruction, but it counts the number of
2346 ones per byte, which then have to be accumulated.
2348 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2349 unsigned VT = Op.getValueType();
2350 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2354 SDOperand N = Op.getOperand(0);
2355 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2357 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2358 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2360 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2364 MachineFunction &MF = DAG.getMachineFunction();
2365 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2367 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2369 SDOperand N = Op.getOperand(0);
2370 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2371 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2372 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2374 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2375 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2377 // CNTB_result becomes the chain to which all of the virtual registers
2378 // CNTB_reg, SUM1_reg become associated:
2379 SDOperand CNTB_result =
2380 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2382 SDOperand CNTB_rescopy =
2383 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2385 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2387 return DAG.getNode(ISD::AND, MVT::i16,
2388 DAG.getNode(ISD::ADD, MVT::i16,
2389 DAG.getNode(ISD::SRL, MVT::i16,
2396 MachineFunction &MF = DAG.getMachineFunction();
2397 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2399 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2400 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2402 SDOperand N = Op.getOperand(0);
2403 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2404 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2405 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2406 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2408 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2409 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2411 // CNTB_result becomes the chain to which all of the virtual registers
2412 // CNTB_reg, SUM1_reg become associated:
2413 SDOperand CNTB_result =
2414 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2416 SDOperand CNTB_rescopy =
2417 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2420 DAG.getNode(ISD::SRL, MVT::i32,
2421 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2424 DAG.getNode(ISD::ADD, MVT::i32,
2425 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2427 SDOperand Sum1_rescopy =
2428 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2431 DAG.getNode(ISD::SRL, MVT::i32,
2432 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2435 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2436 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2438 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2448 /// LowerOperation - Provide custom lowering hooks for some operations.
2451 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2453 switch (Op.getOpcode()) {
2455 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2456 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2457 cerr << "*Op.Val:\n";
2464 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2466 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2467 case ISD::ConstantPool:
2468 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2469 case ISD::GlobalAddress:
2470 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2471 case ISD::JumpTable:
2472 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2474 return LowerConstant(Op, DAG);
2475 case ISD::ConstantFP:
2476 return LowerConstantFP(Op, DAG);
2478 return LowerBRCOND(Op, DAG);
2479 case ISD::FORMAL_ARGUMENTS:
2480 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2482 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2484 return LowerRET(Op, DAG, getTargetMachine());
2493 return LowerI8Math(Op, DAG, Op.getOpcode());
2495 // Vector-related lowering.
2496 case ISD::BUILD_VECTOR:
2497 return LowerBUILD_VECTOR(Op, DAG);
2498 case ISD::SCALAR_TO_VECTOR:
2499 return LowerSCALAR_TO_VECTOR(Op, DAG);
2500 case ISD::VECTOR_SHUFFLE:
2501 return LowerVECTOR_SHUFFLE(Op, DAG);
2502 case ISD::EXTRACT_VECTOR_ELT:
2503 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2504 case ISD::INSERT_VECTOR_ELT:
2505 return LowerINSERT_VECTOR_ELT(Op, DAG);
2507 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2511 return LowerByteImmed(Op, DAG);
2513 // Vector and i8 multiply:
2515 if (MVT::isVector(Op.getValueType()))
2516 return LowerVectorMUL(Op, DAG);
2517 else if (Op.getValueType() == MVT::i8)
2518 return LowerI8Math(Op, DAG, Op.getOpcode());
2520 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2523 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2524 return LowerFDIVf32(Op, DAG);
2525 // else if (Op.getValueType() == MVT::f64)
2526 // return LowerFDIVf64(Op, DAG);
2528 assert(0 && "Calling FDIV on unsupported MVT");
2531 return LowerCTPOP(Op, DAG);
2537 //===----------------------------------------------------------------------===//
2538 // Target Optimization Hooks
2539 //===----------------------------------------------------------------------===//
2542 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2545 TargetMachine &TM = getTargetMachine();
2547 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2548 SelectionDAG &DAG = DCI.DAG;
2549 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2551 switch (N->getOpcode()) {
2553 case SPUISD::IndirectAddr: {
2554 if (!ST->usingLargeMem() && N0.getOpcode() == SPUISD::AFormAddr) {
2555 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2556 if (CN->getValue() == 0) {
2557 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2558 // (SPUaform <addr>, 0)
2560 DEBUG(cerr << "Replace: ");
2561 DEBUG(N->dump(&DAG));
2562 DEBUG(cerr << "\nWith: ");
2563 DEBUG(N0.Val->dump(&DAG));
2564 DEBUG(cerr << "\n");
2571 SDOperand Op0 = N->getOperand(0);
2572 SDOperand Op1 = N->getOperand(1);
2574 if ((Op1.getOpcode() == ISD::Constant
2575 || Op1.getOpcode() == ISD::TargetConstant)
2576 && Op0.getOpcode() == SPUISD::IndirectAddr) {
2577 SDOperand Op01 = Op0.getOperand(1);
2578 if (Op01.getOpcode() == ISD::Constant
2579 || Op01.getOpcode() == ISD::TargetConstant) {
2580 // (add <const>, (SPUindirect <arg>, <const>)) ->
2581 // (SPUindirect <arg>, <const + const>)
2582 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2583 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2584 SDOperand combinedConst =
2585 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2586 Op0.getValueType());
2588 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2589 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2590 DEBUG(cerr << "With: (SPUindirect <arg>, "
2591 << CN0->getValue() + CN1->getValue() << ")\n");
2592 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2593 Op0.getOperand(0), combinedConst);
2595 } else if ((Op0.getOpcode() == ISD::Constant
2596 || Op0.getOpcode() == ISD::TargetConstant)
2597 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2598 SDOperand Op11 = Op1.getOperand(1);
2599 if (Op11.getOpcode() == ISD::Constant
2600 || Op11.getOpcode() == ISD::TargetConstant) {
2601 // (add (SPUindirect <arg>, <const>), <const>) ->
2602 // (SPUindirect <arg>, <const + const>)
2603 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2604 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2605 SDOperand combinedConst =
2606 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2607 Op0.getValueType());
2609 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2610 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2611 DEBUG(cerr << "With: (SPUindirect <arg>, "
2612 << CN0->getValue() + CN1->getValue() << ")\n");
2614 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2615 Op1.getOperand(0), combinedConst);
2620 // Otherwise, return unchanged.
2624 //===----------------------------------------------------------------------===//
2625 // Inline Assembly Support
2626 //===----------------------------------------------------------------------===//
2628 /// getConstraintType - Given a constraint letter, return the type of
2629 /// constraint it is for this target.
2630 SPUTargetLowering::ConstraintType
2631 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2632 if (ConstraintLetter.size() == 1) {
2633 switch (ConstraintLetter[0]) {
2640 return C_RegisterClass;
2643 return TargetLowering::getConstraintType(ConstraintLetter);
2646 std::pair<unsigned, const TargetRegisterClass*>
2647 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2648 MVT::ValueType VT) const
2650 if (Constraint.size() == 1) {
2651 // GCC RS6000 Constraint Letters
2652 switch (Constraint[0]) {
2656 return std::make_pair(0U, SPU::R64CRegisterClass);
2657 return std::make_pair(0U, SPU::R32CRegisterClass);
2660 return std::make_pair(0U, SPU::R32FPRegisterClass);
2661 else if (VT == MVT::f64)
2662 return std::make_pair(0U, SPU::R64FPRegisterClass);
2665 return std::make_pair(0U, SPU::GPRCRegisterClass);
2669 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2673 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2675 uint64_t &KnownZero,
2677 const SelectionDAG &DAG,
2678 unsigned Depth ) const {
2683 // LowerAsmOperandForConstraint
2685 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2686 char ConstraintLetter,
2687 std::vector<SDOperand> &Ops,
2688 SelectionDAG &DAG) {
2689 // Default, for the time being, to the base class handler
2690 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2693 /// isLegalAddressImmediate - Return true if the integer value can be used
2694 /// as the offset of the target addressing mode.
2695 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2696 // SPU's addresses are 256K:
2697 return (V > -(1 << 18) && V < (1 << 18) - 1);
2700 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {