1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "llvm/ADT/VectorExtras.h"
18 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT::ValueType mapping to useful data for Cell SPU
41 struct valtype_map_s {
42 const MVT::ValueType valtype;
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
72 << MVT::getValueTypeString(VT)
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDOperand &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
105 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
106 : TargetLowering(TM),
109 // Fold away setcc operations if possible.
112 // Use _setjmp/_longjmp instead of setjmp/longjmp.
113 setUseUnderscoreSetJmp(true);
114 setUseUnderscoreLongJmp(true);
116 // Set up the SPU's register classes:
117 // NOTE: i8 register class is not registered because we cannot determine when
118 // we need to zero or sign extend for custom-lowered loads and stores.
119 // NOTE: Ignore the previous note. For now. :-)
120 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
121 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
122 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
123 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
124 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
125 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
126 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
128 // SPU has no sign or zero extended loads for i1, i8, i16:
129 setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
130 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
131 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
132 setStoreXAction(MVT::i1, Custom);
134 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
135 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
136 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
137 setStoreXAction(MVT::i8, Custom);
139 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
140 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
141 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
143 // SPU constant load actions are custom lowered:
144 setOperationAction(ISD::Constant, MVT::i64, Custom);
145 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
146 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
148 // SPU's loads and stores have to be custom lowered:
149 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
151 setOperationAction(ISD::LOAD, sctype, Custom);
152 setOperationAction(ISD::STORE, sctype, Custom);
155 // SPU supports BRCOND, although DAGCombine will convert BRCONDs
156 // into BR_CCs. BR_CC instructions are custom selected in
158 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
160 // Expand the jumptable branches
161 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
162 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
163 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
165 // SPU has no intrinsics for these particular operations:
166 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
167 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
168 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
170 // PowerPC has no SREM/UREM instructions
171 setOperationAction(ISD::SREM, MVT::i32, Expand);
172 setOperationAction(ISD::UREM, MVT::i32, Expand);
173 setOperationAction(ISD::SREM, MVT::i64, Expand);
174 setOperationAction(ISD::UREM, MVT::i64, Expand);
176 // We don't support sin/cos/sqrt/fmod
177 setOperationAction(ISD::FSIN , MVT::f64, Expand);
178 setOperationAction(ISD::FCOS , MVT::f64, Expand);
179 setOperationAction(ISD::FREM , MVT::f64, Expand);
180 setOperationAction(ISD::FSIN , MVT::f32, Expand);
181 setOperationAction(ISD::FCOS , MVT::f32, Expand);
182 setOperationAction(ISD::FREM , MVT::f32, Expand);
184 // If we're enabling GP optimizations, use hardware square root
185 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
186 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
188 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
189 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
191 // SPU can do rotate right and left, so legalize it... but customize for i8
192 // because instructions don't exist.
193 setOperationAction(ISD::ROTR, MVT::i32, Legal);
194 setOperationAction(ISD::ROTR, MVT::i16, Legal);
195 setOperationAction(ISD::ROTR, MVT::i8, Custom);
196 setOperationAction(ISD::ROTL, MVT::i32, Legal);
197 setOperationAction(ISD::ROTL, MVT::i16, Legal);
198 setOperationAction(ISD::ROTL, MVT::i8, Custom);
199 // SPU has no native version of shift left/right for i8
200 setOperationAction(ISD::SHL, MVT::i8, Custom);
201 setOperationAction(ISD::SRL, MVT::i8, Custom);
202 setOperationAction(ISD::SRA, MVT::i8, Custom);
204 // Custom lower i32 multiplications
205 setOperationAction(ISD::MUL, MVT::i32, Custom);
207 // Need to custom handle (some) common i8 math ops
208 setOperationAction(ISD::SUB, MVT::i8, Custom);
209 setOperationAction(ISD::MUL, MVT::i8, Custom);
211 // SPU does not have BSWAP. It does have i32 support CTLZ.
212 // CTPOP has to be custom lowered.
213 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
214 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
216 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
217 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
218 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
219 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
221 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
222 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
224 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
226 // SPU does not have select or setcc
227 setOperationAction(ISD::SELECT, MVT::i1, Expand);
228 setOperationAction(ISD::SELECT, MVT::i8, Expand);
229 setOperationAction(ISD::SELECT, MVT::i16, Expand);
230 setOperationAction(ISD::SELECT, MVT::i32, Expand);
231 setOperationAction(ISD::SELECT, MVT::i64, Expand);
232 setOperationAction(ISD::SELECT, MVT::f32, Expand);
233 setOperationAction(ISD::SELECT, MVT::f64, Expand);
235 setOperationAction(ISD::SETCC, MVT::i1, Expand);
236 setOperationAction(ISD::SETCC, MVT::i8, Expand);
237 setOperationAction(ISD::SETCC, MVT::i16, Expand);
238 setOperationAction(ISD::SETCC, MVT::i32, Expand);
239 setOperationAction(ISD::SETCC, MVT::i64, Expand);
240 setOperationAction(ISD::SETCC, MVT::f32, Expand);
241 setOperationAction(ISD::SETCC, MVT::f64, Expand);
243 // SPU has a legal FP -> signed INT instruction
244 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
245 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
246 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
247 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
249 // FDIV on SPU requires custom lowering
250 setOperationAction(ISD::FDIV, MVT::f32, Custom);
251 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
253 // SPU has [U|S]INT_TO_FP
254 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
255 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
256 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
257 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
258 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
259 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
260 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
261 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
263 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
264 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
265 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
266 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
268 // We cannot sextinreg(i1). Expand to shifts.
269 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
271 // Support label based line numbers.
272 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
273 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
275 // We want to legalize GlobalAddress and ConstantPool nodes into the
276 // appropriate instructions to materialize the address.
277 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
278 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
279 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
280 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
281 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
282 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
283 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
284 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
286 // RET must be custom lowered, to meet ABI requirements
287 setOperationAction(ISD::RET, MVT::Other, Custom);
289 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
290 setOperationAction(ISD::VASTART , MVT::Other, Custom);
292 // Use the default implementation.
293 setOperationAction(ISD::VAARG , MVT::Other, Expand);
294 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
295 setOperationAction(ISD::VAEND , MVT::Other, Expand);
296 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
297 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
298 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
299 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
301 // Cell SPU has instructions for converting between i64 and fp.
302 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
303 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
305 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
306 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
308 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
309 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
311 // First set operation action for all vector types to expand. Then we
312 // will selectively turn on ones that can be effectively codegen'd.
313 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
314 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
315 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
316 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
317 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
318 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
320 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
321 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
322 // add/sub are legal for all supported vector VT's.
323 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
324 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
325 // mul has to be custom lowered.
326 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
328 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
329 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
330 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
331 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
332 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
333 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
335 // These operations need to be expanded:
336 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
337 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
338 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
339 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
340 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
342 // Custom lower build_vector, constant pool spills, insert and
343 // extract vector elements:
344 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
345 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
346 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
347 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
348 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
349 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
352 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
353 setOperationAction(ISD::AND, MVT::v16i8, Custom);
354 setOperationAction(ISD::OR, MVT::v16i8, Custom);
355 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
356 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
358 setSetCCResultType(MVT::i32);
359 setShiftAmountType(MVT::i32);
360 setSetCCResultContents(ZeroOrOneSetCCResult);
362 setStackPointerRegisterToSaveRestore(SPU::R1);
364 // We have target-specific dag combine patterns for the following nodes:
365 // e.g., setTargetDAGCombine(ISD::SUB);
367 computeRegisterProperties();
371 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
373 if (node_names.empty()) {
374 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
375 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
376 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
377 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
378 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
379 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
380 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
381 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
382 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
383 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
384 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
385 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
386 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
387 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
388 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
389 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
390 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
391 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
392 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
393 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
394 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
395 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
396 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
397 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
398 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
399 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
400 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
401 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
402 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
403 "SPUISD::ROTBYTES_RIGHT_Z";
404 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
405 "SPUISD::ROTBYTES_RIGHT_S";
406 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
407 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
408 "SPUISD::ROTBYTES_LEFT_CHAINED";
409 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
410 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
411 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
412 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
413 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
414 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
417 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
419 return ((i != node_names.end()) ? i->second : 0);
422 //===----------------------------------------------------------------------===//
423 // Calling convention code:
424 //===----------------------------------------------------------------------===//
426 #include "SPUGenCallingConv.inc"
428 //===----------------------------------------------------------------------===//
429 // LowerOperation implementation
430 //===----------------------------------------------------------------------===//
432 /// Aligned load common code for CellSPU
434 \param[in] Op The SelectionDAG load or store operand
435 \param[in] DAG The selection DAG
436 \param[in] ST CellSPU subtarget information structure
437 \param[in,out] alignment Caller initializes this to the load or store node's
438 value from getAlignment(), may be updated while generating the aligned load
439 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
440 offset (divisible by 16, modulo 16 == 0)
441 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
442 offset of the preferred slot (modulo 16 != 0)
443 \param[in,out] VT Caller initializes this value type to the the load or store
444 node's loaded or stored value type; may be updated if an i1-extended load or
446 \param[out] was16aligned true if the base pointer had 16-byte alignment,
447 otherwise false. Can help to determine if the chunk needs to be rotated.
449 Both load and store lowering load a block of data aligned on a 16-byte
450 boundary. This is the common aligned load code shared between both.
453 AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
455 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
456 MVT::ValueType &VT, bool &was16aligned)
458 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
459 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
460 SDOperand basePtr = LSN->getBasePtr();
461 SDOperand chain = LSN->getChain();
463 if (basePtr.getOpcode() == ISD::ADD) {
464 SDOperand Op1 = basePtr.Val->getOperand(1);
466 if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
467 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.Val->getOperand(1));
469 alignOffs = (int) CN->getValue();
470 prefSlotOffs = (int) (alignOffs & 0xf);
472 // Adjust the rotation amount to ensure that the final result ends up in
473 // the preferred slot:
474 prefSlotOffs -= vtm->prefslot_byte;
475 basePtr = basePtr.getOperand(0);
477 // Modify alignment, since the ADD is likely from getElementPtr:
478 switch (basePtr.getOpcode()) {
479 case ISD::GlobalAddress:
480 case ISD::TargetGlobalAddress: {
481 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(basePtr.Val);
482 const GlobalValue *GV = GN->getGlobal();
483 alignment = GV->getAlignment();
489 prefSlotOffs = -vtm->prefslot_byte;
493 prefSlotOffs = -vtm->prefslot_byte;
496 if (alignment == 16) {
497 // Realign the base pointer as a D-Form address:
498 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
499 if (isMemoryOperand(basePtr)) {
500 SDOperand Zero = DAG.getConstant(0, PtrVT);
501 unsigned Opc = (!ST->usingLargeMem()
503 : SPUISD::XFormAddr);
504 basePtr = DAG.getNode(Opc, PtrVT, basePtr, Zero);
506 basePtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
507 basePtr, DAG.getConstant((alignOffs & ~0xf), PtrVT));
510 // Emit the vector load:
512 return DAG.getLoad(MVT::v16i8, chain, basePtr,
513 LSN->getSrcValue(), LSN->getSrcValueOffset(),
514 LSN->isVolatile(), 16);
517 // Unaligned load or we're using the "large memory" model, which means that
518 // we have to be very pessimistic:
519 if (isMemoryOperand(basePtr)) {
520 basePtr = DAG.getNode(SPUISD::XFormAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
524 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, DAG.getConstant(alignOffs, PtrVT));
525 was16aligned = false;
526 return DAG.getLoad(MVT::v16i8, chain, basePtr,
527 LSN->getSrcValue(), LSN->getSrcValueOffset(),
528 LSN->isVolatile(), 16);
531 /// Custom lower loads for CellSPU
533 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
534 within a 16-byte block, we have to rotate to extract the requested element.
537 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
538 LoadSDNode *LN = cast<LoadSDNode>(Op);
539 SDOperand the_chain = LN->getChain();
540 MVT::ValueType VT = LN->getLoadedVT();
541 MVT::ValueType OpVT = Op.Val->getValueType(0);
542 ISD::LoadExtType ExtType = LN->getExtensionType();
543 unsigned alignment = LN->getAlignment();
546 // For an extending load of an i1 variable, just call it i8 (or whatever we
547 // were passed) and make it zero-extended:
550 ExtType = ISD::ZEXTLOAD;
553 switch (LN->getAddressingMode()) {
554 case ISD::UNINDEXED: {
558 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
563 the_chain = result.getValue(1);
564 // Rotate the chunk if necessary
567 if (rotamt != 0 || !was16aligned) {
568 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
573 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
575 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
576 LoadSDNode *LN1 = cast<LoadSDNode>(result);
579 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
580 DAG.getConstant(rotamt, PtrVT));
583 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
584 the_chain = result.getValue(1);
587 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
589 MVT::ValueType vecVT = MVT::v16i8;
591 // Convert the loaded v16i8 vector to the appropriate vector type
592 // specified by the operand:
595 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
597 vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
600 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
601 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
602 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
603 the_chain = result.getValue(1);
605 // Handle the sign and zero-extending loads for i1 and i8:
608 if (ExtType == ISD::SEXTLOAD) {
609 NewOpC = (OpVT == MVT::i1
610 ? SPUISD::EXTRACT_I1_SEXT
611 : SPUISD::EXTRACT_I8_SEXT);
613 assert(ExtType == ISD::ZEXTLOAD);
614 NewOpC = (OpVT == MVT::i1
615 ? SPUISD::EXTRACT_I1_ZEXT
616 : SPUISD::EXTRACT_I8_ZEXT);
619 result = DAG.getNode(NewOpC, OpVT, result);
622 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
623 SDOperand retops[2] = { result, the_chain };
625 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
632 case ISD::LAST_INDEXED_MODE:
633 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
635 cerr << (unsigned) LN->getAddressingMode() << "\n";
643 /// Custom lower stores for CellSPU
645 All CellSPU stores are aligned to 16-byte boundaries, so for elements
646 within a 16-byte block, we have to generate a shuffle to insert the
647 requested element into its place, then store the resulting block.
650 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
651 StoreSDNode *SN = cast<StoreSDNode>(Op);
652 SDOperand Value = SN->getValue();
653 MVT::ValueType VT = Value.getValueType();
654 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
655 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
656 unsigned alignment = SN->getAlignment();
658 switch (SN->getAddressingMode()) {
659 case ISD::UNINDEXED: {
660 int chunk_offset, slot_offset;
663 // The vector type we really want to load from the 16-byte chunk, except
664 // in the case of MVT::i1, which has to be v16i8.
665 unsigned vecVT, stVecVT = MVT::v16i8;
668 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
669 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
671 SDOperand alignLoadVec =
672 AlignedLoad(Op, DAG, ST, SN, alignment,
673 chunk_offset, slot_offset, VT, was16aligned);
675 if (alignLoadVec.Val == 0)
678 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
679 SDOperand basePtr = LN->getBasePtr();
680 SDOperand the_chain = alignLoadVec.getValue(1);
681 SDOperand theValue = SN->getValue();
685 && (theValue.getOpcode() == ISD::AssertZext
686 || theValue.getOpcode() == ISD::AssertSext)) {
687 // Drill down and get the value for zero- and sign-extended
689 theValue = theValue.getOperand(0);
694 SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
695 SDOperand insertEltPtr;
696 SDOperand insertEltOp;
698 // If the base pointer is already a D-form address, then just create
699 // a new D-form address with a slot offset and the orignal base pointer.
700 // Otherwise generate a D-form address with the slot offset relative
701 // to the stack pointer, which is always aligned.
702 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
703 DEBUG(basePtr.Val->dump(&DAG));
706 if (basePtr.getOpcode() == SPUISD::DFormAddr) {
707 insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
708 basePtr.getOperand(0),
710 } else if (basePtr.getOpcode() == SPUISD::XFormAddr ||
711 (basePtr.getOpcode() == ISD::ADD
712 && basePtr.getOperand(0).getOpcode() == SPUISD::XFormAddr)) {
713 insertEltPtr = basePtr;
715 insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
716 DAG.getRegister(SPU::R1, PtrVT),
720 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
721 result = DAG.getNode(SPUISD::SHUFB, vecVT,
722 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
724 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
726 result = DAG.getStore(the_chain, result, basePtr,
727 LN->getSrcValue(), LN->getSrcValueOffset(),
728 LN->isVolatile(), LN->getAlignment());
737 case ISD::LAST_INDEXED_MODE:
738 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
740 cerr << (unsigned) SN->getAddressingMode() << "\n";
748 /// Generate the address of a constant pool entry.
750 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
751 MVT::ValueType PtrVT = Op.getValueType();
752 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
753 Constant *C = CP->getConstVal();
754 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
755 SDOperand Zero = DAG.getConstant(0, PtrVT);
756 const TargetMachine &TM = DAG.getTarget();
758 if (TM.getRelocationModel() == Reloc::Static) {
759 if (!ST->usingLargeMem()) {
760 // Just return the SDOperand with the constant pool address in it.
764 // Generate hi/lo address pair
765 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
766 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
768 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
770 return DAG.getNode(SPUISD::XFormAddr, PtrVT, CPI, Zero);
776 "LowerConstantPool: Relocation model other than static not supported.");
781 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
782 MVT::ValueType PtrVT = Op.getValueType();
783 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
784 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
785 SDOperand Zero = DAG.getConstant(0, PtrVT);
786 const TargetMachine &TM = DAG.getTarget();
788 if (TM.getRelocationModel() == Reloc::Static) {
789 return (!ST->usingLargeMem()
791 : DAG.getNode(SPUISD::XFormAddr, PtrVT, JTI, Zero));
795 "LowerJumpTable: Relocation model other than static not supported.");
800 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
801 MVT::ValueType PtrVT = Op.getValueType();
802 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
803 GlobalValue *GV = GSDN->getGlobal();
804 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
805 const TargetMachine &TM = DAG.getTarget();
806 SDOperand Zero = DAG.getConstant(0, PtrVT);
808 if (TM.getRelocationModel() == Reloc::Static) {
809 return (!ST->usingLargeMem()
811 : DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero));
813 cerr << "LowerGlobalAddress: Relocation model other than static not "
822 //! Custom lower i64 integer constants
824 This code inserts all of the necessary juggling that needs to occur to load
825 a 64-bit constant into a register.
828 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
829 unsigned VT = Op.getValueType();
830 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
832 if (VT == MVT::i64) {
833 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
834 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
835 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
838 cerr << "LowerConstant: unhandled constant type "
839 << MVT::getValueTypeString(VT)
848 //! Custom lower single precision floating point constants
850 "float" immediates can be lowered as if they were unsigned 32-bit integers.
851 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
855 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
856 unsigned VT = Op.getValueType();
857 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
860 "LowerConstantFP: Node is not ConstantFPSDNode");
862 if (VT == MVT::f32) {
863 float targetConst = FP->getValueAPF().convertToFloat();
864 return DAG.getNode(SPUISD::SFPConstant, VT,
865 DAG.getTargetConstantFP(targetConst, VT));
866 } else if (VT == MVT::f64) {
867 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
868 return DAG.getNode(ISD::BIT_CONVERT, VT,
869 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
876 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
878 MachineFunction &MF = DAG.getMachineFunction();
879 MachineFrameInfo *MFI = MF.getFrameInfo();
880 MachineRegisterInfo &RegInfo = MF.getRegInfo();
881 SmallVector<SDOperand, 8> ArgValues;
882 SDOperand Root = Op.getOperand(0);
883 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
885 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
886 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
888 unsigned ArgOffset = SPUFrameInfo::minStackSize();
889 unsigned ArgRegIdx = 0;
890 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
892 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
894 // Add DAG nodes to load the arguments or copy them out of registers.
895 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
897 bool needsLoad = false;
898 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
899 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
903 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
904 << MVT::getValueTypeString(ObjectVT)
909 if (!isVarArg && ArgRegIdx < NumArgRegs) {
910 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
911 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
912 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
919 if (!isVarArg && ArgRegIdx < NumArgRegs) {
920 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
921 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
922 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
929 if (!isVarArg && ArgRegIdx < NumArgRegs) {
930 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
931 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
932 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
939 if (!isVarArg && ArgRegIdx < NumArgRegs) {
940 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
941 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
942 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
949 if (!isVarArg && ArgRegIdx < NumArgRegs) {
950 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
951 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
952 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
959 if (!isVarArg && ArgRegIdx < NumArgRegs) {
960 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
961 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
962 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
973 if (!isVarArg && ArgRegIdx < NumArgRegs) {
974 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
975 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
976 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
984 // We need to load the argument to a virtual register if we determined above
985 // that we ran out of physical registers of the appropriate type
987 // If the argument is actually used, emit a load from the right stack
989 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
990 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
991 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
992 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
994 // Don't emit a dead load.
995 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
998 ArgOffset += StackSlotSize;
1001 ArgValues.push_back(ArgVal);
1004 // If the function takes variable number of arguments, make a frame index for
1005 // the start of the first vararg value... for expansion of llvm.va_start.
1007 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1009 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1010 // If this function is vararg, store any remaining integer argument regs to
1011 // their spots on the stack so that they may be loaded by deferencing the
1012 // result of va_next.
1013 SmallVector<SDOperand, 8> MemOps;
1014 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1015 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1016 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1017 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1018 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1019 MemOps.push_back(Store);
1020 // Increment the address by four for the next argument to store
1021 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1022 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1024 if (!MemOps.empty())
1025 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1028 ArgValues.push_back(Root);
1030 // Return the new list of results.
1031 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1032 Op.Val->value_end());
1033 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1036 /// isLSAAddress - Return the immediate to use if the specified
1037 /// value is representable as a LSA address.
1038 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1039 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1042 int Addr = C->getValue();
1043 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1044 (Addr << 14 >> 14) != Addr)
1045 return 0; // Top 14 bits have to be sext of immediate.
1047 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1052 LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1053 SDOperand Chain = Op.getOperand(0);
1055 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1056 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1058 SDOperand Callee = Op.getOperand(4);
1059 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1060 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1061 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1062 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1064 // Handy pointer type
1065 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1067 // Accumulate how many bytes are to be pushed on the stack, including the
1068 // linkage area, and parameter passing area. According to the SPU ABI,
1069 // we minimally need space for [LR] and [SP]
1070 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1072 // Set up a copy of the stack pointer for use loading and storing any
1073 // arguments that may not fit in the registers available for argument
1075 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1077 // Figure out which arguments are going to go in registers, and which in
1079 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1080 unsigned ArgRegIdx = 0;
1082 // Keep track of registers passing arguments
1083 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1084 // And the arguments passed on the stack
1085 SmallVector<SDOperand, 8> MemOpChains;
1087 for (unsigned i = 0; i != NumOps; ++i) {
1088 SDOperand Arg = Op.getOperand(5+2*i);
1090 // PtrOff will be used to store the current argument to the stack if a
1091 // register cannot be found for it.
1092 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1093 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1095 switch (Arg.getValueType()) {
1096 default: assert(0 && "Unexpected ValueType for argument!");
1100 if (ArgRegIdx != NumArgRegs) {
1101 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1103 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1104 ArgOffset += StackSlotSize;
1109 if (ArgRegIdx != NumArgRegs) {
1110 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1112 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1113 ArgOffset += StackSlotSize;
1120 if (ArgRegIdx != NumArgRegs) {
1121 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1123 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1124 ArgOffset += StackSlotSize;
1130 // Update number of stack bytes actually used, insert a call sequence start
1131 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1132 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1134 if (!MemOpChains.empty()) {
1135 // Adjust the stack pointer for the stack arguments.
1136 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1137 &MemOpChains[0], MemOpChains.size());
1140 // Build a sequence of copy-to-reg nodes chained together with token chain
1141 // and flag operands which copy the outgoing args into the appropriate regs.
1143 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1144 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1146 InFlag = Chain.getValue(1);
1149 std::vector<MVT::ValueType> NodeTys;
1150 NodeTys.push_back(MVT::Other); // Returns a chain
1151 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1153 SmallVector<SDOperand, 8> Ops;
1154 unsigned CallOpc = SPUISD::CALL;
1156 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1157 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1158 // node so that legalize doesn't hack it.
1159 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1160 GlobalValue *GV = G->getGlobal();
1161 unsigned CalleeVT = Callee.getValueType();
1162 SDOperand Zero = DAG.getConstant(0, PtrVT);
1163 SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1165 if (!ST->usingLargeMem()) {
1166 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1167 // style calls, otherwise, external symbols are BRASL calls. This assumes
1168 // that declared/defined symbols are in the same compilation unit and can
1169 // be reached through PC-relative jumps.
1172 // This may be an unsafe assumption for JIT and really large compilation
1174 if (GV->isDeclaration()) {
1175 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1177 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1180 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1182 Callee = DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero);
1184 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1185 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1186 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1187 // If this is an absolute destination address that appears to be a legal
1188 // local store address, use the munged value.
1189 Callee = SDOperand(Dest, 0);
1192 Ops.push_back(Chain);
1193 Ops.push_back(Callee);
1195 // Add argument registers to the end of the list so that they are known live
1197 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1198 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1199 RegsToPass[i].second.getValueType()));
1202 Ops.push_back(InFlag);
1203 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1204 InFlag = Chain.getValue(1);
1206 SDOperand ResultVals[3];
1207 unsigned NumResults = 0;
1210 // If the call has results, copy the values out of the ret val registers.
1211 switch (Op.Val->getValueType(0)) {
1212 default: assert(0 && "Unexpected ret value!");
1213 case MVT::Other: break;
1215 if (Op.Val->getValueType(1) == MVT::i32) {
1216 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1217 ResultVals[0] = Chain.getValue(0);
1218 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1219 Chain.getValue(2)).getValue(1);
1220 ResultVals[1] = Chain.getValue(0);
1222 NodeTys.push_back(MVT::i32);
1224 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1225 ResultVals[0] = Chain.getValue(0);
1228 NodeTys.push_back(MVT::i32);
1231 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1232 ResultVals[0] = Chain.getValue(0);
1234 NodeTys.push_back(MVT::i64);
1238 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1239 InFlag).getValue(1);
1240 ResultVals[0] = Chain.getValue(0);
1242 NodeTys.push_back(Op.Val->getValueType(0));
1249 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1250 InFlag).getValue(1);
1251 ResultVals[0] = Chain.getValue(0);
1253 NodeTys.push_back(Op.Val->getValueType(0));
1257 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1258 DAG.getConstant(NumStackBytes, PtrVT));
1259 NodeTys.push_back(MVT::Other);
1261 // If the function returns void, just return the chain.
1262 if (NumResults == 0)
1265 // Otherwise, merge everything together with a MERGE_VALUES node.
1266 ResultVals[NumResults++] = Chain;
1267 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1268 ResultVals, NumResults);
1269 return Res.getValue(Op.ResNo);
1273 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1274 SmallVector<CCValAssign, 16> RVLocs;
1275 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1276 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1277 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1278 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1280 // If this is the first return lowered for this function, add the regs to the
1281 // liveout set for the function.
1282 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1283 for (unsigned i = 0; i != RVLocs.size(); ++i)
1284 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1287 SDOperand Chain = Op.getOperand(0);
1290 // Copy the result values into the output registers.
1291 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1292 CCValAssign &VA = RVLocs[i];
1293 assert(VA.isRegLoc() && "Can only return in registers!");
1294 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1295 Flag = Chain.getValue(1);
1299 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1301 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1305 //===----------------------------------------------------------------------===//
1306 // Vector related lowering:
1307 //===----------------------------------------------------------------------===//
1309 static ConstantSDNode *
1310 getVecImm(SDNode *N) {
1311 SDOperand OpVal(0, 0);
1313 // Check to see if this buildvec has a single non-undef value in its elements.
1314 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1315 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1317 OpVal = N->getOperand(i);
1318 else if (OpVal != N->getOperand(i))
1322 if (OpVal.Val != 0) {
1323 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1328 return 0; // All UNDEF: use implicit def.; not Constant node
1331 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1332 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1334 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1335 MVT::ValueType ValueType) {
1336 if (ConstantSDNode *CN = getVecImm(N)) {
1337 uint64_t Value = CN->getValue();
1338 if (Value <= 0x3ffff)
1339 return DAG.getConstant(Value, ValueType);
1345 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1346 /// and the value fits into a signed 16-bit constant, and if so, return the
1348 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1349 MVT::ValueType ValueType) {
1350 if (ConstantSDNode *CN = getVecImm(N)) {
1351 if (ValueType == MVT::i32) {
1352 int Value = (int) CN->getValue();
1353 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1355 if (Value == SExtValue)
1356 return DAG.getConstant(Value, ValueType);
1357 } else if (ValueType == MVT::i16) {
1358 short Value = (short) CN->getValue();
1359 int SExtValue = ((int) Value << 16) >> 16;
1361 if (Value == (short) SExtValue)
1362 return DAG.getConstant(Value, ValueType);
1363 } else if (ValueType == MVT::i64) {
1364 int64_t Value = CN->getValue();
1365 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1367 if (Value == SExtValue)
1368 return DAG.getConstant(Value, ValueType);
1375 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1376 /// and the value fits into a signed 10-bit constant, and if so, return the
1378 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1379 MVT::ValueType ValueType) {
1380 if (ConstantSDNode *CN = getVecImm(N)) {
1381 int Value = (int) CN->getValue();
1382 if ((ValueType == MVT::i32 && isS10Constant(Value))
1383 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1384 return DAG.getConstant(Value, ValueType);
1390 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1391 /// and the value fits into a signed 8-bit constant, and if so, return the
1394 /// @note: The incoming vector is v16i8 because that's the only way we can load
1395 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1397 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1398 MVT::ValueType ValueType) {
1399 if (ConstantSDNode *CN = getVecImm(N)) {
1400 int Value = (int) CN->getValue();
1401 if (ValueType == MVT::i16
1402 && Value <= 0xffff /* truncated from uint64_t */
1403 && ((short) Value >> 8) == ((short) Value & 0xff))
1404 return DAG.getConstant(Value & 0xff, ValueType);
1405 else if (ValueType == MVT::i8
1406 && (Value & 0xff) == Value)
1407 return DAG.getConstant(Value, ValueType);
1413 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1414 /// and the value fits into a signed 16-bit constant, and if so, return the
1416 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1417 MVT::ValueType ValueType) {
1418 if (ConstantSDNode *CN = getVecImm(N)) {
1419 uint64_t Value = CN->getValue();
1420 if ((ValueType == MVT::i32
1421 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1422 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1423 return DAG.getConstant(Value >> 16, ValueType);
1429 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1430 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1431 if (ConstantSDNode *CN = getVecImm(N)) {
1432 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1438 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1439 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1440 if (ConstantSDNode *CN = getVecImm(N)) {
1441 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1447 // If this is a vector of constants or undefs, get the bits. A bit in
1448 // UndefBits is set if the corresponding element of the vector is an
1449 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1450 // zero. Return true if this is not an array of constants, false if it is.
1452 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1453 uint64_t UndefBits[2]) {
1454 // Start with zero'd results.
1455 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1457 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1458 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1459 SDOperand OpVal = BV->getOperand(i);
1461 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1462 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1464 uint64_t EltBits = 0;
1465 if (OpVal.getOpcode() == ISD::UNDEF) {
1466 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1467 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1469 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1470 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1471 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1472 const APFloat &apf = CN->getValueAPF();
1473 EltBits = (CN->getValueType(0) == MVT::f32
1474 ? FloatToBits(apf.convertToFloat())
1475 : DoubleToBits(apf.convertToDouble()));
1477 // Nonconstant element.
1481 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1484 //printf("%llx %llx %llx %llx\n",
1485 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1489 /// If this is a splat (repetition) of a value across the whole vector, return
1490 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1491 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1492 /// SplatSize = 1 byte.
1493 static bool isConstantSplat(const uint64_t Bits128[2],
1494 const uint64_t Undef128[2],
1496 uint64_t &SplatBits, uint64_t &SplatUndef,
1498 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1499 // the same as the lower 64-bits, ignoring undefs.
1500 uint64_t Bits64 = Bits128[0] | Bits128[1];
1501 uint64_t Undef64 = Undef128[0] & Undef128[1];
1502 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1503 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1504 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1505 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1507 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1508 if (MinSplatBits < 64) {
1510 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1512 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1513 if (MinSplatBits < 32) {
1515 // If the top 16-bits are different than the lower 16-bits, ignoring
1516 // undefs, we have an i32 splat.
1517 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1518 if (MinSplatBits < 16) {
1519 // If the top 8-bits are different than the lower 8-bits, ignoring
1520 // undefs, we have an i16 splat.
1521 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1522 // Otherwise, we have an 8-bit splat.
1523 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1524 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1530 SplatUndef = Undef16;
1537 SplatUndef = Undef32;
1543 SplatBits = Bits128[0];
1544 SplatUndef = Undef128[0];
1550 return false; // Can't be a splat if two pieces don't match.
1553 // If this is a case we can't handle, return null and let the default
1554 // expansion code take care of it. If we CAN select this case, and if it
1555 // selects to a single instruction, return Op. Otherwise, if we can codegen
1556 // this case more efficiently than a constant pool load, lower it to the
1557 // sequence of ops that should be used.
1558 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1559 MVT::ValueType VT = Op.getValueType();
1560 // If this is a vector of constants or undefs, get the bits. A bit in
1561 // UndefBits is set if the corresponding element of the vector is an
1562 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1564 uint64_t VectorBits[2];
1565 uint64_t UndefBits[2];
1566 uint64_t SplatBits, SplatUndef;
1568 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1569 || !isConstantSplat(VectorBits, UndefBits,
1570 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1571 SplatBits, SplatUndef, SplatSize))
1572 return SDOperand(); // Not a constant vector, not a splat.
1577 uint32_t Value32 = SplatBits;
1578 assert(SplatSize == 4
1579 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1580 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1581 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1582 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1583 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1587 uint64_t f64val = SplatBits;
1588 assert(SplatSize == 8
1589 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1590 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1591 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1592 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1593 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1597 // 8-bit constants have to be expanded to 16-bits
1598 unsigned short Value16 = SplatBits | (SplatBits << 8);
1600 for (int i = 0; i < 8; ++i)
1601 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1602 return DAG.getNode(ISD::BIT_CONVERT, VT,
1603 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1606 unsigned short Value16;
1608 Value16 = (unsigned short) (SplatBits & 0xffff);
1610 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1611 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1613 for (int i = 0; i < 8; ++i) Ops[i] = T;
1614 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1617 unsigned int Value = SplatBits;
1618 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1619 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1622 uint64_t val = SplatBits;
1623 uint32_t upper = uint32_t(val >> 32);
1624 uint32_t lower = uint32_t(val);
1629 SmallVector<SDOperand, 16> ShufBytes;
1631 bool upper_special, lower_special;
1633 // NOTE: This code creates common-case shuffle masks that can be easily
1634 // detected as common expressions. It is not attempting to create highly
1635 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1637 // Detect if the upper or lower half is a special shuffle mask pattern:
1638 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1639 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1641 // Create lower vector if not a special pattern
1642 if (!lower_special) {
1643 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1644 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1645 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1646 LO32C, LO32C, LO32C, LO32C));
1649 // Create upper vector if not a special pattern
1650 if (!upper_special) {
1651 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1652 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1653 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1654 HI32C, HI32C, HI32C, HI32C));
1657 // If either upper or lower are special, then the two input operands are
1658 // the same (basically, one of them is a "don't care")
1663 if (lower_special && upper_special) {
1664 // Unhappy situation... both upper and lower are special, so punt with
1665 // a target constant:
1666 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1667 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1671 for (int i = 0; i < 4; ++i) {
1672 for (int j = 0; j < 4; ++j) {
1674 bool process_upper, process_lower;
1677 process_upper = (upper_special && (i & 1) == 0);
1678 process_lower = (lower_special && (i & 1) == 1);
1680 if (process_upper || process_lower) {
1681 if ((process_upper && upper == 0)
1682 || (process_lower && lower == 0))
1684 else if ((process_upper && upper == 0xffffffff)
1685 || (process_lower && lower == 0xffffffff))
1687 else if ((process_upper && upper == 0x80000000)
1688 || (process_lower && lower == 0x80000000))
1689 val = (j == 0 ? 0xe0 : 0x80);
1691 val = i * 4 + j + ((i & 1) * 16);
1693 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1697 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1698 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1699 &ShufBytes[0], ShufBytes.size()));
1701 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1702 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1703 return DAG.getNode(ISD::BIT_CONVERT, VT,
1704 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1705 Zero, Zero, Zero, Zero));
1713 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1714 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1715 /// permutation vector, V3, is monotonically increasing with one "exception"
1716 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1717 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1718 /// In either case, the net result is going to eventually invoke SHUFB to
1719 /// permute/shuffle the bytes from V1 and V2.
1721 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1722 /// control word for byte/halfword/word insertion. This takes care of a single
1723 /// element move from V2 into V1.
1725 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1726 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1727 SDOperand V1 = Op.getOperand(0);
1728 SDOperand V2 = Op.getOperand(1);
1729 SDOperand PermMask = Op.getOperand(2);
1731 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1733 // If we have a single element being moved from V1 to V2, this can be handled
1734 // using the C*[DX] compute mask instructions, but the vector elements have
1735 // to be monotonically increasing with one exception element.
1736 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1737 unsigned EltsFromV2 = 0;
1739 unsigned V2EltIdx0 = 0;
1740 unsigned CurrElt = 0;
1741 bool monotonic = true;
1742 if (EltVT == MVT::i8)
1744 else if (EltVT == MVT::i16)
1746 else if (EltVT == MVT::i32)
1749 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1751 for (unsigned i = 0, e = PermMask.getNumOperands();
1752 EltsFromV2 <= 1 && monotonic && i != e;
1755 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1758 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1760 if (SrcElt >= V2EltIdx0) {
1762 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1763 } else if (CurrElt != SrcElt) {
1770 if (EltsFromV2 == 1 && monotonic) {
1771 // Compute mask and shuffle
1772 MachineFunction &MF = DAG.getMachineFunction();
1773 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1774 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1775 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1776 // Initialize temporary register to 0
1777 SDOperand InitTempReg =
1778 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1779 // Copy register's contents as index in INSERT_MASK:
1780 SDOperand ShufMaskOp =
1781 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1782 DAG.getTargetConstant(V2Elt, MVT::i32),
1783 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1784 // Use shuffle mask in SHUFB synthetic instruction:
1785 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1787 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1788 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1790 SmallVector<SDOperand, 16> ResultMask;
1791 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1793 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1796 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1798 for (unsigned j = 0; j != BytesPerElement; ++j) {
1799 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1804 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1805 &ResultMask[0], ResultMask.size());
1806 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1810 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1811 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1813 if (Op0.Val->getOpcode() == ISD::Constant) {
1814 // For a constant, build the appropriate constant vector, which will
1815 // eventually simplify to a vector register load.
1817 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1818 SmallVector<SDOperand, 16> ConstVecValues;
1822 // Create a constant vector:
1823 switch (Op.getValueType()) {
1824 default: assert(0 && "Unexpected constant value type in "
1825 "LowerSCALAR_TO_VECTOR");
1826 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1827 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1828 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1829 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1830 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1831 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1834 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1835 for (size_t j = 0; j < n_copies; ++j)
1836 ConstVecValues.push_back(CValue);
1838 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1839 &ConstVecValues[0], ConstVecValues.size());
1841 // Otherwise, copy the value from one register to another:
1842 switch (Op0.getValueType()) {
1843 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1850 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1857 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1858 switch (Op.getValueType()) {
1860 SDOperand rA = Op.getOperand(0);
1861 SDOperand rB = Op.getOperand(1);
1862 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1863 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1864 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1865 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1867 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1871 // Multiply two v8i16 vectors (pipeline friendly version):
1872 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1873 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1874 // c) Use SELB to select upper and lower halves from the intermediate results
1876 // NOTE: We really want to move the FSMBI to earlier to actually get the
1877 // dual-issue. This code does manage to do this, even if it's a little on
1880 MachineFunction &MF = DAG.getMachineFunction();
1881 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1882 SDOperand Chain = Op.getOperand(0);
1883 SDOperand rA = Op.getOperand(0);
1884 SDOperand rB = Op.getOperand(1);
1885 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1886 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1889 DAG.getCopyToReg(Chain, FSMBIreg,
1890 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1891 DAG.getConstant(0xcccc, MVT::i32)));
1894 DAG.getCopyToReg(FSMBOp, HiProdReg,
1895 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1897 SDOperand HHProd_v4i32 =
1898 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1899 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1901 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1902 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1903 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1904 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1906 DAG.getConstant(16, MVT::i16))),
1907 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1910 // This M00sE is N@stI! (apologies to Monty Python)
1912 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1913 // is to break it all apart, sign extend, and reassemble the various
1914 // intermediate products.
1916 MachineFunction &MF = DAG.getMachineFunction();
1917 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1918 SDOperand Chain = Op.getOperand(0);
1919 SDOperand rA = Op.getOperand(0);
1920 SDOperand rB = Op.getOperand(1);
1921 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1922 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1924 unsigned FSMBreg_2222 = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1925 unsigned LoProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1926 unsigned HiProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1929 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1930 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1931 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1933 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1935 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1938 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1939 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1941 SDOperand FSMBdef_2222 =
1942 DAG.getCopyToReg(Chain, FSMBreg_2222,
1943 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1944 DAG.getConstant(0x2222, MVT::i32)));
1946 SDOperand FSMBuse_2222 =
1947 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1949 SDOperand LoProd_1 =
1950 DAG.getCopyToReg(Chain, LoProd_reg,
1951 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1954 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1957 DAG.getNode(ISD::AND, MVT::v4i32,
1958 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1959 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1960 LoProdMask, LoProdMask,
1961 LoProdMask, LoProdMask));
1964 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1965 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1968 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1969 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1972 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1973 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1974 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1976 SDOperand HHProd_1 =
1977 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1978 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1979 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1980 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1981 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1984 DAG.getCopyToReg(Chain, HiProd_reg,
1985 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1987 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
1991 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1992 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
1994 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
1995 DAG.getNode(ISD::OR, MVT::v4i32,
2000 cerr << "CellSPU: Unknown vector multiplication, got "
2001 << MVT::getValueTypeString(Op.getValueType())
2010 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2011 MachineFunction &MF = DAG.getMachineFunction();
2012 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2014 SDOperand A = Op.getOperand(0);
2015 SDOperand B = Op.getOperand(1);
2016 unsigned VT = Op.getValueType();
2018 unsigned VRegBR, VRegC;
2020 if (VT == MVT::f32) {
2021 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2022 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2024 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2025 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2027 // TODO: make sure we're feeding FPInterp the right arguments
2028 // Right now: fi B, frest(B)
2031 // (Floating Interpolate (FP Reciprocal Estimate B))
2033 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2034 DAG.getNode(SPUISD::FPInterp, VT, B,
2035 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2037 // Computes A * BRcpl and stores in a temporary register
2039 DAG.getCopyToReg(BRcpl, VRegC,
2040 DAG.getNode(ISD::FMUL, VT, A,
2041 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2042 // What's the Chain variable do? It's magic!
2043 // TODO: set Chain = Op(0).getEntryNode()
2045 return DAG.getNode(ISD::FADD, VT,
2046 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2047 DAG.getNode(ISD::FMUL, VT,
2048 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2049 DAG.getNode(ISD::FSUB, VT, A,
2050 DAG.getNode(ISD::FMUL, VT, B,
2051 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2054 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2055 unsigned VT = Op.getValueType();
2056 SDOperand N = Op.getOperand(0);
2057 SDOperand Elt = Op.getOperand(1);
2058 SDOperand ShufMask[16];
2059 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2061 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2063 int EltNo = (int) C->getValue();
2066 if (VT == MVT::i8 && EltNo >= 16)
2067 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2068 else if (VT == MVT::i16 && EltNo >= 8)
2069 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2070 else if (VT == MVT::i32 && EltNo >= 4)
2071 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2072 else if (VT == MVT::i64 && EltNo >= 2)
2073 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2075 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2076 // i32 and i64: Element 0 is the preferred slot
2077 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2080 // Need to generate shuffle mask and extract:
2081 int prefslot_begin = -1, prefslot_end = -1;
2082 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2086 prefslot_begin = prefslot_end = 3;
2090 prefslot_begin = 2; prefslot_end = 3;
2094 prefslot_begin = 0; prefslot_end = 3;
2098 prefslot_begin = 0; prefslot_end = 7;
2103 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2104 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2106 for (int i = 0; i < 16; ++i) {
2107 // zero fill uppper part of preferred slot, don't care about the
2109 unsigned int mask_val;
2111 if (i <= prefslot_end) {
2113 ((i < prefslot_begin)
2115 : elt_byte + (i - prefslot_begin));
2117 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2119 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2122 SDOperand ShufMaskVec =
2123 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2125 sizeof(ShufMask) / sizeof(ShufMask[0]));
2127 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2128 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2129 N, N, ShufMaskVec));
2133 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2134 SDOperand VecOp = Op.getOperand(0);
2135 SDOperand ValOp = Op.getOperand(1);
2136 SDOperand IdxOp = Op.getOperand(2);
2137 MVT::ValueType VT = Op.getValueType();
2139 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2140 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2142 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2143 // Use $2 because it's always 16-byte aligned and it's available:
2144 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2147 DAG.getNode(SPUISD::SHUFB, VT,
2148 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2150 DAG.getNode(SPUISD::INSERT_MASK, VT,
2151 DAG.getNode(ISD::ADD, PtrVT,
2153 DAG.getConstant(CN->getValue(),
2159 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2160 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2162 assert(Op.getValueType() == MVT::i8);
2165 assert(0 && "Unhandled i8 math operator");
2169 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2171 SDOperand N1 = Op.getOperand(1);
2172 N0 = (N0.getOpcode() != ISD::Constant
2173 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2174 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2175 N1 = (N1.getOpcode() != ISD::Constant
2176 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2177 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2178 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2179 DAG.getNode(Opc, MVT::i16, N0, N1));
2183 SDOperand N1 = Op.getOperand(1);
2185 N0 = (N0.getOpcode() != ISD::Constant
2186 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2187 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2188 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2189 N1 = (N1.getOpcode() != ISD::Constant
2190 ? DAG.getNode(N1Opc, MVT::i16, N1)
2191 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2192 SDOperand ExpandArg =
2193 DAG.getNode(ISD::OR, MVT::i16, N0,
2194 DAG.getNode(ISD::SHL, MVT::i16,
2195 N0, DAG.getConstant(8, MVT::i16)));
2196 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2197 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2201 SDOperand N1 = Op.getOperand(1);
2203 N0 = (N0.getOpcode() != ISD::Constant
2204 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2205 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2206 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2207 N1 = (N1.getOpcode() != ISD::Constant
2208 ? DAG.getNode(N1Opc, MVT::i16, N1)
2209 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2210 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2211 DAG.getNode(Opc, MVT::i16, N0, N1));
2214 SDOperand N1 = Op.getOperand(1);
2216 N0 = (N0.getOpcode() != ISD::Constant
2217 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2218 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2219 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2220 N1 = (N1.getOpcode() != ISD::Constant
2221 ? DAG.getNode(N1Opc, MVT::i16, N1)
2222 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2223 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2224 DAG.getNode(Opc, MVT::i16, N0, N1));
2227 SDOperand N1 = Op.getOperand(1);
2229 N0 = (N0.getOpcode() != ISD::Constant
2230 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2231 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2232 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2233 N1 = (N1.getOpcode() != ISD::Constant
2234 ? DAG.getNode(N1Opc, MVT::i16, N1)
2235 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2236 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2237 DAG.getNode(Opc, MVT::i16, N0, N1));
2245 //! Lower byte immediate operations for v16i8 vectors:
2247 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2250 MVT::ValueType VT = Op.getValueType();
2252 ConstVec = Op.getOperand(0);
2253 Arg = Op.getOperand(1);
2254 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2255 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2256 ConstVec = ConstVec.getOperand(0);
2258 ConstVec = Op.getOperand(1);
2259 Arg = Op.getOperand(0);
2260 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2261 ConstVec = ConstVec.getOperand(0);
2266 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2267 uint64_t VectorBits[2];
2268 uint64_t UndefBits[2];
2269 uint64_t SplatBits, SplatUndef;
2272 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2273 && isConstantSplat(VectorBits, UndefBits,
2274 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2275 SplatBits, SplatUndef, SplatSize)) {
2276 SDOperand tcVec[16];
2277 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2278 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2280 // Turn the BUILD_VECTOR into a set of target constants:
2281 for (size_t i = 0; i < tcVecSize; ++i)
2284 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2285 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2292 //! Lower i32 multiplication
2293 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2297 cerr << "CellSPU: Unknown LowerMUL value type, got "
2298 << MVT::getValueTypeString(Op.getValueType())
2304 SDOperand rA = Op.getOperand(0);
2305 SDOperand rB = Op.getOperand(1);
2307 return DAG.getNode(ISD::ADD, MVT::i32,
2308 DAG.getNode(ISD::ADD, MVT::i32,
2309 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2310 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2311 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2318 //! Custom lowering for CTPOP (count population)
2320 Custom lowering code that counts the number ones in the input
2321 operand. SPU has such an instruction, but it counts the number of
2322 ones per byte, which then have to be accumulated.
2324 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2325 unsigned VT = Op.getValueType();
2326 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2330 SDOperand N = Op.getOperand(0);
2331 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2333 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2334 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2336 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2340 MachineFunction &MF = DAG.getMachineFunction();
2341 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2343 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2345 SDOperand N = Op.getOperand(0);
2346 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2347 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2348 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2350 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2351 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2353 // CNTB_result becomes the chain to which all of the virtual registers
2354 // CNTB_reg, SUM1_reg become associated:
2355 SDOperand CNTB_result =
2356 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2358 SDOperand CNTB_rescopy =
2359 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2361 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2363 return DAG.getNode(ISD::AND, MVT::i16,
2364 DAG.getNode(ISD::ADD, MVT::i16,
2365 DAG.getNode(ISD::SRL, MVT::i16,
2372 MachineFunction &MF = DAG.getMachineFunction();
2373 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2375 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2376 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2378 SDOperand N = Op.getOperand(0);
2379 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2380 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2381 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2382 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2384 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2385 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2387 // CNTB_result becomes the chain to which all of the virtual registers
2388 // CNTB_reg, SUM1_reg become associated:
2389 SDOperand CNTB_result =
2390 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2392 SDOperand CNTB_rescopy =
2393 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2396 DAG.getNode(ISD::SRL, MVT::i32,
2397 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2400 DAG.getNode(ISD::ADD, MVT::i32,
2401 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2403 SDOperand Sum1_rescopy =
2404 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2407 DAG.getNode(ISD::SRL, MVT::i32,
2408 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2411 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2412 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2414 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2424 /// LowerOperation - Provide custom lowering hooks for some operations.
2427 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2429 switch (Op.getOpcode()) {
2431 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2432 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2433 cerr << "*Op.Val:\n";
2440 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2442 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2443 case ISD::ConstantPool:
2444 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2445 case ISD::GlobalAddress:
2446 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2447 case ISD::JumpTable:
2448 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2450 return LowerConstant(Op, DAG);
2451 case ISD::ConstantFP:
2452 return LowerConstantFP(Op, DAG);
2453 case ISD::FORMAL_ARGUMENTS:
2454 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2456 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2458 return LowerRET(Op, DAG, getTargetMachine());
2467 return LowerI8Math(Op, DAG, Op.getOpcode());
2469 // Vector-related lowering.
2470 case ISD::BUILD_VECTOR:
2471 return LowerBUILD_VECTOR(Op, DAG);
2472 case ISD::SCALAR_TO_VECTOR:
2473 return LowerSCALAR_TO_VECTOR(Op, DAG);
2474 case ISD::VECTOR_SHUFFLE:
2475 return LowerVECTOR_SHUFFLE(Op, DAG);
2476 case ISD::EXTRACT_VECTOR_ELT:
2477 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2478 case ISD::INSERT_VECTOR_ELT:
2479 return LowerINSERT_VECTOR_ELT(Op, DAG);
2481 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2485 return LowerByteImmed(Op, DAG);
2487 // Vector and i8 multiply:
2489 if (MVT::isVector(Op.getValueType()))
2490 return LowerVectorMUL(Op, DAG);
2491 else if (Op.getValueType() == MVT::i8)
2492 return LowerI8Math(Op, DAG, Op.getOpcode());
2494 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2497 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2498 return LowerFDIVf32(Op, DAG);
2499 // else if (Op.getValueType() == MVT::f64)
2500 // return LowerFDIVf64(Op, DAG);
2502 assert(0 && "Calling FDIV on unsupported MVT");
2505 return LowerCTPOP(Op, DAG);
2511 //===----------------------------------------------------------------------===//
2512 // Other Lowering Code
2513 //===----------------------------------------------------------------------===//
2516 SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2517 MachineBasicBlock *BB)
2522 //===----------------------------------------------------------------------===//
2523 // Target Optimization Hooks
2524 //===----------------------------------------------------------------------===//
2527 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2530 TargetMachine &TM = getTargetMachine();
2531 SelectionDAG &DAG = DCI.DAG;
2533 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2535 switch (N->getOpcode()) {
2538 // Look for obvious optimizations for shift left:
2539 // a) Replace 0 << V with 0
2540 // b) Replace V << 0 with V
2542 // N.B: llvm will generate an undef node if the shift amount is greater than
2543 // 15 (e.g.: V << 16), which will naturally trigger an assert.
2546 case SPU::SHLQBIIvec:
2548 case SPU::ROTHIr16_i32:
2550 case SPU::ROTIr32_i16:
2551 case SPU::ROTQBYIvec:
2552 case SPU::ROTQBYBIvec:
2553 case SPU::ROTQBIIvec:
2554 case SPU::ROTHMIr16:
2556 case SPU::ROTQMBYIvec: {
2557 if (N0.getOpcode() == ISD::Constant) {
2558 if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2559 if (C->getValue() == 0) // 0 << V -> 0.
2563 SDOperand N1 = N->getOperand(1);
2564 if (N1.getOpcode() == ISD::Constant) {
2565 if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2566 if (C->getValue() == 0) // V << 0 -> V
2577 //===----------------------------------------------------------------------===//
2578 // Inline Assembly Support
2579 //===----------------------------------------------------------------------===//
2581 /// getConstraintType - Given a constraint letter, return the type of
2582 /// constraint it is for this target.
2583 SPUTargetLowering::ConstraintType
2584 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2585 if (ConstraintLetter.size() == 1) {
2586 switch (ConstraintLetter[0]) {
2593 return C_RegisterClass;
2596 return TargetLowering::getConstraintType(ConstraintLetter);
2599 std::pair<unsigned, const TargetRegisterClass*>
2600 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2601 MVT::ValueType VT) const
2603 if (Constraint.size() == 1) {
2604 // GCC RS6000 Constraint Letters
2605 switch (Constraint[0]) {
2609 return std::make_pair(0U, SPU::R64CRegisterClass);
2610 return std::make_pair(0U, SPU::R32CRegisterClass);
2613 return std::make_pair(0U, SPU::R32FPRegisterClass);
2614 else if (VT == MVT::f64)
2615 return std::make_pair(0U, SPU::R64FPRegisterClass);
2618 return std::make_pair(0U, SPU::GPRCRegisterClass);
2622 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2626 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2628 uint64_t &KnownZero,
2630 const SelectionDAG &DAG,
2631 unsigned Depth ) const {
2636 // LowerAsmOperandForConstraint
2638 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2639 char ConstraintLetter,
2640 std::vector<SDOperand> &Ops,
2641 SelectionDAG &DAG) {
2642 // Default, for the time being, to the base class handler
2643 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2646 /// isLegalAddressImmediate - Return true if the integer value can be used
2647 /// as the offset of the target addressing mode.
2648 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2649 // SPU's addresses are 256K:
2650 return (V > -(1 << 18) && V < (1 << 18) - 1);
2653 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {