1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "llvm/ADT/VectorExtras.h"
18 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT::ValueType mapping to useful data for Cell SPU
41 struct valtype_map_s {
42 const MVT::ValueType valtype;
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
72 << MVT::getValueTypeString(VT)
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDOperand &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDOperand &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 // NOTE: i8 register class is not registered because we cannot determine when
126 // we need to zero or sign extend for custom-lowered loads and stores.
127 // NOTE: Ignore the previous note. For now. :-)
128 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
129 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
130 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
131 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
132 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
133 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
134 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
136 // SPU has no sign or zero extended loads for i1, i8, i16:
137 setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote);
138 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
139 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
140 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
141 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
142 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
143 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
144 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
146 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
147 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
148 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
149 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
150 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
151 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
152 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
153 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
155 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
156 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
157 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
159 // SPU constant load actions are custom lowered:
160 setOperationAction(ISD::Constant, MVT::i64, Custom);
161 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
162 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
164 // SPU's loads and stores have to be custom lowered:
165 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
167 setOperationAction(ISD::LOAD, sctype, Custom);
168 setOperationAction(ISD::STORE, sctype, Custom);
171 // Custom lower BRCOND for i1, i8 to "promote" the result to
172 // i32 and i16, respectively.
173 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
175 // Expand the jumptable branches
176 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
177 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
178 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
180 // SPU has no intrinsics for these particular operations:
181 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
182 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
183 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
184 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
186 // PowerPC has no SREM/UREM instructions
187 setOperationAction(ISD::SREM, MVT::i32, Expand);
188 setOperationAction(ISD::UREM, MVT::i32, Expand);
189 setOperationAction(ISD::SREM, MVT::i64, Expand);
190 setOperationAction(ISD::UREM, MVT::i64, Expand);
192 // We don't support sin/cos/sqrt/fmod
193 setOperationAction(ISD::FSIN , MVT::f64, Expand);
194 setOperationAction(ISD::FCOS , MVT::f64, Expand);
195 setOperationAction(ISD::FREM , MVT::f64, Expand);
196 setOperationAction(ISD::FSIN , MVT::f32, Expand);
197 setOperationAction(ISD::FCOS , MVT::f32, Expand);
198 setOperationAction(ISD::FREM , MVT::f32, Expand);
200 // If we're enabling GP optimizations, use hardware square root
201 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
202 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
204 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
205 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
207 // SPU can do rotate right and left, so legalize it... but customize for i8
208 // because instructions don't exist.
209 setOperationAction(ISD::ROTR, MVT::i32, Legal);
210 setOperationAction(ISD::ROTR, MVT::i16, Legal);
211 setOperationAction(ISD::ROTR, MVT::i8, Custom);
212 setOperationAction(ISD::ROTL, MVT::i32, Legal);
213 setOperationAction(ISD::ROTL, MVT::i16, Legal);
214 setOperationAction(ISD::ROTL, MVT::i8, Custom);
215 // SPU has no native version of shift left/right for i8
216 setOperationAction(ISD::SHL, MVT::i8, Custom);
217 setOperationAction(ISD::SRL, MVT::i8, Custom);
218 setOperationAction(ISD::SRA, MVT::i8, Custom);
219 // And SPU needs custom lowering for shift left/right for i64
220 setOperationAction(ISD::SHL, MVT::i64, Custom);
221 setOperationAction(ISD::SRL, MVT::i64, Custom);
222 setOperationAction(ISD::SRA, MVT::i64, Custom);
224 // Custom lower i32 multiplications
225 setOperationAction(ISD::MUL, MVT::i32, Custom);
227 // Need to custom handle (some) common i8 math ops
228 setOperationAction(ISD::SUB, MVT::i8, Custom);
229 setOperationAction(ISD::MUL, MVT::i8, Custom);
231 // SPU does not have BSWAP. It does have i32 support CTLZ.
232 // CTPOP has to be custom lowered.
233 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
234 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
236 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
237 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
238 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
239 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
241 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
242 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
244 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
246 // SPU does not have select or setcc
247 setOperationAction(ISD::SELECT, MVT::i1, Expand);
248 setOperationAction(ISD::SELECT, MVT::i8, Expand);
249 setOperationAction(ISD::SELECT, MVT::i16, Expand);
250 setOperationAction(ISD::SELECT, MVT::i32, Expand);
251 setOperationAction(ISD::SELECT, MVT::i64, Expand);
252 setOperationAction(ISD::SELECT, MVT::f32, Expand);
253 setOperationAction(ISD::SELECT, MVT::f64, Expand);
255 // Zero extension and sign extension for i64 have to be
257 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
258 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
259 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
261 // SPU has a legal FP -> signed INT instruction
262 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
263 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
264 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
265 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
267 // FDIV on SPU requires custom lowering
268 setOperationAction(ISD::FDIV, MVT::f32, Custom);
269 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
271 // SPU has [U|S]INT_TO_FP
272 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
273 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
274 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
275 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
276 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
277 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
278 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
279 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
281 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
282 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
283 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
284 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
286 // We cannot sextinreg(i1). Expand to shifts.
287 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
289 // Support label based line numbers.
290 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
291 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
293 // We want to legalize GlobalAddress and ConstantPool nodes into the
294 // appropriate instructions to materialize the address.
295 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
297 setOperationAction(ISD::GlobalAddress, sctype, Custom);
298 setOperationAction(ISD::ConstantPool, sctype, Custom);
299 setOperationAction(ISD::JumpTable, sctype, Custom);
302 // RET must be custom lowered, to meet ABI requirements
303 setOperationAction(ISD::RET, MVT::Other, Custom);
305 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
306 setOperationAction(ISD::VASTART , MVT::Other, Custom);
308 // Use the default implementation.
309 setOperationAction(ISD::VAARG , MVT::Other, Expand);
310 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
311 setOperationAction(ISD::VAEND , MVT::Other, Expand);
312 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
313 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
314 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
315 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
317 // Cell SPU has instructions for converting between i64 and fp.
318 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
319 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
321 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
322 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
324 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
325 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
327 // First set operation action for all vector types to expand. Then we
328 // will selectively turn on ones that can be effectively codegen'd.
329 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
330 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
331 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
332 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
333 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
334 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
336 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
337 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
338 // add/sub are legal for all supported vector VT's.
339 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
340 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
341 // mul has to be custom lowered.
342 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
344 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
345 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
346 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
347 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
348 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
349 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
351 // These operations need to be expanded:
352 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
353 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
354 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
355 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
356 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
358 // Custom lower build_vector, constant pool spills, insert and
359 // extract vector elements:
360 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
361 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
362 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
363 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
364 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
365 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
368 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
369 setOperationAction(ISD::AND, MVT::v16i8, Custom);
370 setOperationAction(ISD::OR, MVT::v16i8, Custom);
371 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
372 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
374 setSetCCResultType(MVT::i32);
375 setShiftAmountType(MVT::i32);
376 setSetCCResultContents(ZeroOrOneSetCCResult);
378 setStackPointerRegisterToSaveRestore(SPU::R1);
380 // We have target-specific dag combine patterns for the following nodes:
381 setTargetDAGCombine(ISD::ADD);
382 setTargetDAGCombine(ISD::ZERO_EXTEND);
383 setTargetDAGCombine(ISD::SIGN_EXTEND);
384 setTargetDAGCombine(ISD::ANY_EXTEND);
386 computeRegisterProperties();
390 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
392 if (node_names.empty()) {
393 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
394 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
395 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
396 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
397 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
398 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
399 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
400 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
401 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
402 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
403 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
404 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
405 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
406 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
407 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
408 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
409 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
410 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
411 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
412 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
413 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
414 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
415 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
416 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
417 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
418 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
419 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
420 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
421 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
422 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
423 "SPUISD::ROTQUAD_RZ_BYTES";
424 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
425 "SPUISD::ROTQUAD_RZ_BITS";
426 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
427 "SPUISD::ROTBYTES_RIGHT_S";
428 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
429 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
430 "SPUISD::ROTBYTES_LEFT_CHAINED";
431 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
432 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
433 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
434 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
435 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
438 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
440 return ((i != node_names.end()) ? i->second : 0);
443 //===----------------------------------------------------------------------===//
444 // Calling convention code:
445 //===----------------------------------------------------------------------===//
447 #include "SPUGenCallingConv.inc"
449 //===----------------------------------------------------------------------===//
450 // LowerOperation implementation
451 //===----------------------------------------------------------------------===//
453 /// Aligned load common code for CellSPU
455 \param[in] Op The SelectionDAG load or store operand
456 \param[in] DAG The selection DAG
457 \param[in] ST CellSPU subtarget information structure
458 \param[in,out] alignment Caller initializes this to the load or store node's
459 value from getAlignment(), may be updated while generating the aligned load
460 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
461 offset (divisible by 16, modulo 16 == 0)
462 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
463 offset of the preferred slot (modulo 16 != 0)
464 \param[in,out] VT Caller initializes this value type to the the load or store
465 node's loaded or stored value type; may be updated if an i1-extended load or
467 \param[out] was16aligned true if the base pointer had 16-byte alignment,
468 otherwise false. Can help to determine if the chunk needs to be rotated.
470 Both load and store lowering load a block of data aligned on a 16-byte
471 boundary. This is the common aligned load code shared between both.
474 AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
476 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
477 MVT::ValueType &VT, bool &was16aligned)
479 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
480 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
481 SDOperand basePtr = LSN->getBasePtr();
482 SDOperand chain = LSN->getChain();
484 if (basePtr.getOpcode() == ISD::ADD) {
485 SDOperand Op1 = basePtr.Val->getOperand(1);
487 if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
488 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
490 alignOffs = (int) CN->getValue();
491 prefSlotOffs = (int) (alignOffs & 0xf);
493 // Adjust the rotation amount to ensure that the final result ends up in
494 // the preferred slot:
495 prefSlotOffs -= vtm->prefslot_byte;
496 basePtr = basePtr.getOperand(0);
498 // Loading from memory, can we adjust alignment?
499 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
500 SDOperand APtr = basePtr.getOperand(0);
501 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
502 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
503 alignment = GSDN->getGlobal()->getAlignment();
508 prefSlotOffs = -vtm->prefslot_byte;
512 prefSlotOffs = -vtm->prefslot_byte;
515 if (alignment == 16) {
516 // Realign the base pointer as a D-Form address:
517 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
518 basePtr = DAG.getNode(ISD::ADD, PtrVT,
520 DAG.getConstant((alignOffs & ~0xf), PtrVT));
523 // Emit the vector load:
525 return DAG.getLoad(MVT::v16i8, chain, basePtr,
526 LSN->getSrcValue(), LSN->getSrcValueOffset(),
527 LSN->isVolatile(), 16);
530 // Unaligned load or we're using the "large memory" model, which means that
531 // we have to be very pessimistic:
532 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
533 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
537 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
538 DAG.getConstant((alignOffs & ~0xf), PtrVT));
539 was16aligned = false;
540 return DAG.getLoad(MVT::v16i8, chain, basePtr,
541 LSN->getSrcValue(), LSN->getSrcValueOffset(),
542 LSN->isVolatile(), 16);
545 /// Custom lower loads for CellSPU
547 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
548 within a 16-byte block, we have to rotate to extract the requested element.
551 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
552 LoadSDNode *LN = cast<LoadSDNode>(Op);
553 SDOperand the_chain = LN->getChain();
554 MVT::ValueType VT = LN->getMemoryVT();
555 MVT::ValueType OpVT = Op.Val->getValueType(0);
556 ISD::LoadExtType ExtType = LN->getExtensionType();
557 unsigned alignment = LN->getAlignment();
560 switch (LN->getAddressingMode()) {
561 case ISD::UNINDEXED: {
565 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
570 the_chain = result.getValue(1);
571 // Rotate the chunk if necessary
574 if (rotamt != 0 || !was16aligned) {
575 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
580 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
582 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
583 LoadSDNode *LN1 = cast<LoadSDNode>(result);
584 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
585 DAG.getConstant(rotamt, PtrVT));
588 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
589 the_chain = result.getValue(1);
592 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
594 MVT::ValueType vecVT = MVT::v16i8;
596 // Convert the loaded v16i8 vector to the appropriate vector type
597 // specified by the operand:
600 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
602 vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
605 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
606 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
607 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
608 the_chain = result.getValue(1);
610 // Handle the sign and zero-extending loads for i1 and i8:
613 if (ExtType == ISD::SEXTLOAD) {
614 NewOpC = (OpVT == MVT::i1
615 ? SPUISD::EXTRACT_I1_SEXT
616 : SPUISD::EXTRACT_I8_SEXT);
618 assert(ExtType == ISD::ZEXTLOAD);
619 NewOpC = (OpVT == MVT::i1
620 ? SPUISD::EXTRACT_I1_ZEXT
621 : SPUISD::EXTRACT_I8_ZEXT);
624 result = DAG.getNode(NewOpC, OpVT, result);
627 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
628 SDOperand retops[2] = {
633 result = DAG.getNode(SPUISD::LDRESULT, retvts,
634 retops, sizeof(retops) / sizeof(retops[0]));
641 case ISD::LAST_INDEXED_MODE:
642 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
644 cerr << (unsigned) LN->getAddressingMode() << "\n";
652 /// Custom lower stores for CellSPU
654 All CellSPU stores are aligned to 16-byte boundaries, so for elements
655 within a 16-byte block, we have to generate a shuffle to insert the
656 requested element into its place, then store the resulting block.
659 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
660 StoreSDNode *SN = cast<StoreSDNode>(Op);
661 SDOperand Value = SN->getValue();
662 MVT::ValueType VT = Value.getValueType();
663 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
664 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
665 unsigned alignment = SN->getAlignment();
667 switch (SN->getAddressingMode()) {
668 case ISD::UNINDEXED: {
669 int chunk_offset, slot_offset;
672 // The vector type we really want to load from the 16-byte chunk, except
673 // in the case of MVT::i1, which has to be v16i8.
674 unsigned vecVT, stVecVT = MVT::v16i8;
677 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
678 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
680 SDOperand alignLoadVec =
681 AlignedLoad(Op, DAG, ST, SN, alignment,
682 chunk_offset, slot_offset, VT, was16aligned);
684 if (alignLoadVec.Val == 0)
687 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
688 SDOperand basePtr = LN->getBasePtr();
689 SDOperand the_chain = alignLoadVec.getValue(1);
690 SDOperand theValue = SN->getValue();
694 && (theValue.getOpcode() == ISD::AssertZext
695 || theValue.getOpcode() == ISD::AssertSext)) {
696 // Drill down and get the value for zero- and sign-extended
698 theValue = theValue.getOperand(0);
703 SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
704 SDOperand insertEltPtr;
705 SDOperand insertEltOp;
707 // If the base pointer is already a D-form address, then just create
708 // a new D-form address with a slot offset and the orignal base pointer.
709 // Otherwise generate a D-form address with the slot offset relative
710 // to the stack pointer, which is always aligned.
711 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
712 DEBUG(basePtr.Val->dump(&DAG));
715 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
716 (basePtr.getOpcode() == ISD::ADD
717 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
718 insertEltPtr = basePtr;
720 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
723 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
724 result = DAG.getNode(SPUISD::SHUFB, vecVT,
725 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
727 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
729 result = DAG.getStore(the_chain, result, basePtr,
730 LN->getSrcValue(), LN->getSrcValueOffset(),
731 LN->isVolatile(), LN->getAlignment());
740 case ISD::LAST_INDEXED_MODE:
741 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
743 cerr << (unsigned) SN->getAddressingMode() << "\n";
751 /// Generate the address of a constant pool entry.
753 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
754 MVT::ValueType PtrVT = Op.getValueType();
755 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
756 Constant *C = CP->getConstVal();
757 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
758 SDOperand Zero = DAG.getConstant(0, PtrVT);
759 const TargetMachine &TM = DAG.getTarget();
761 if (TM.getRelocationModel() == Reloc::Static) {
762 if (!ST->usingLargeMem()) {
763 // Just return the SDOperand with the constant pool address in it.
764 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
766 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
767 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
768 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
773 "LowerConstantPool: Relocation model other than static not supported.");
778 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
779 MVT::ValueType PtrVT = Op.getValueType();
780 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
781 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
782 SDOperand Zero = DAG.getConstant(0, PtrVT);
783 const TargetMachine &TM = DAG.getTarget();
785 if (TM.getRelocationModel() == Reloc::Static) {
786 if (!ST->usingLargeMem()) {
787 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
789 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
790 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
791 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
796 "LowerJumpTable: Relocation model other than static not supported.");
801 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
802 MVT::ValueType PtrVT = Op.getValueType();
803 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
804 GlobalValue *GV = GSDN->getGlobal();
805 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
806 const TargetMachine &TM = DAG.getTarget();
807 SDOperand Zero = DAG.getConstant(0, PtrVT);
809 if (TM.getRelocationModel() == Reloc::Static) {
810 if (!ST->usingLargeMem()) {
811 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
813 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
814 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
815 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
818 cerr << "LowerGlobalAddress: Relocation model other than static not "
827 //! Custom lower i64 integer constants
829 This code inserts all of the necessary juggling that needs to occur to load
830 a 64-bit constant into a register.
833 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
834 unsigned VT = Op.getValueType();
835 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
837 if (VT == MVT::i64) {
838 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
839 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
840 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
843 cerr << "LowerConstant: unhandled constant type "
844 << MVT::getValueTypeString(VT)
853 //! Custom lower double precision floating point constants
855 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
856 unsigned VT = Op.getValueType();
857 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
860 "LowerConstantFP: Node is not ConstantFPSDNode");
862 if (VT == MVT::f64) {
863 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
864 return DAG.getNode(ISD::BIT_CONVERT, VT,
865 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
871 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
873 LowerBRCOND(SDOperand Op, SelectionDAG &DAG)
875 SDOperand Cond = Op.getOperand(1);
876 MVT::ValueType CondVT = Cond.getValueType();
877 MVT::ValueType CondNVT;
879 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
880 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
881 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
883 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
886 return SDOperand(); // Unchanged
890 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
892 MachineFunction &MF = DAG.getMachineFunction();
893 MachineFrameInfo *MFI = MF.getFrameInfo();
894 MachineRegisterInfo &RegInfo = MF.getRegInfo();
895 SmallVector<SDOperand, 8> ArgValues;
896 SDOperand Root = Op.getOperand(0);
897 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
899 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
900 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
902 unsigned ArgOffset = SPUFrameInfo::minStackSize();
903 unsigned ArgRegIdx = 0;
904 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
906 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
908 // Add DAG nodes to load the arguments or copy them out of registers.
909 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
911 bool needsLoad = false;
912 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
913 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
917 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
918 << MVT::getValueTypeString(ObjectVT)
923 if (!isVarArg && ArgRegIdx < NumArgRegs) {
924 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
925 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
926 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
933 if (!isVarArg && ArgRegIdx < NumArgRegs) {
934 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
935 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
936 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
943 if (!isVarArg && ArgRegIdx < NumArgRegs) {
944 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
945 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
946 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
953 if (!isVarArg && ArgRegIdx < NumArgRegs) {
954 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
955 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
956 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
963 if (!isVarArg && ArgRegIdx < NumArgRegs) {
964 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
965 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
966 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
973 if (!isVarArg && ArgRegIdx < NumArgRegs) {
974 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
975 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
976 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
987 if (!isVarArg && ArgRegIdx < NumArgRegs) {
988 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
989 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
990 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
998 // We need to load the argument to a virtual register if we determined above
999 // that we ran out of physical registers of the appropriate type
1001 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1002 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1003 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1004 ArgOffset += StackSlotSize;
1007 ArgValues.push_back(ArgVal);
1010 // If the function takes variable number of arguments, make a frame index for
1011 // the start of the first vararg value... for expansion of llvm.va_start.
1013 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1015 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1016 // If this function is vararg, store any remaining integer argument regs to
1017 // their spots on the stack so that they may be loaded by deferencing the
1018 // result of va_next.
1019 SmallVector<SDOperand, 8> MemOps;
1020 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1021 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1022 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1023 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1024 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1025 MemOps.push_back(Store);
1026 // Increment the address by four for the next argument to store
1027 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1028 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1030 if (!MemOps.empty())
1031 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1034 ArgValues.push_back(Root);
1036 // Return the new list of results.
1037 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1038 Op.Val->value_end());
1039 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1042 /// isLSAAddress - Return the immediate to use if the specified
1043 /// value is representable as a LSA address.
1044 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1045 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1048 int Addr = C->getValue();
1049 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1050 (Addr << 14 >> 14) != Addr)
1051 return 0; // Top 14 bits have to be sext of immediate.
1053 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1058 LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1059 SDOperand Chain = Op.getOperand(0);
1061 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1062 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1064 SDOperand Callee = Op.getOperand(4);
1065 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1066 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1067 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1068 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1070 // Handy pointer type
1071 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1073 // Accumulate how many bytes are to be pushed on the stack, including the
1074 // linkage area, and parameter passing area. According to the SPU ABI,
1075 // we minimally need space for [LR] and [SP]
1076 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1078 // Set up a copy of the stack pointer for use loading and storing any
1079 // arguments that may not fit in the registers available for argument
1081 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1083 // Figure out which arguments are going to go in registers, and which in
1085 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1086 unsigned ArgRegIdx = 0;
1088 // Keep track of registers passing arguments
1089 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1090 // And the arguments passed on the stack
1091 SmallVector<SDOperand, 8> MemOpChains;
1093 for (unsigned i = 0; i != NumOps; ++i) {
1094 SDOperand Arg = Op.getOperand(5+2*i);
1096 // PtrOff will be used to store the current argument to the stack if a
1097 // register cannot be found for it.
1098 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1099 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1101 switch (Arg.getValueType()) {
1102 default: assert(0 && "Unexpected ValueType for argument!");
1106 if (ArgRegIdx != NumArgRegs) {
1107 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1109 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1110 ArgOffset += StackSlotSize;
1115 if (ArgRegIdx != NumArgRegs) {
1116 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1118 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1119 ArgOffset += StackSlotSize;
1126 if (ArgRegIdx != NumArgRegs) {
1127 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1129 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1130 ArgOffset += StackSlotSize;
1136 // Update number of stack bytes actually used, insert a call sequence start
1137 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1138 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1140 if (!MemOpChains.empty()) {
1141 // Adjust the stack pointer for the stack arguments.
1142 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1143 &MemOpChains[0], MemOpChains.size());
1146 // Build a sequence of copy-to-reg nodes chained together with token chain
1147 // and flag operands which copy the outgoing args into the appropriate regs.
1149 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1150 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1152 InFlag = Chain.getValue(1);
1155 std::vector<MVT::ValueType> NodeTys;
1156 NodeTys.push_back(MVT::Other); // Returns a chain
1157 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1159 SmallVector<SDOperand, 8> Ops;
1160 unsigned CallOpc = SPUISD::CALL;
1162 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1163 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1164 // node so that legalize doesn't hack it.
1165 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1166 GlobalValue *GV = G->getGlobal();
1167 unsigned CalleeVT = Callee.getValueType();
1168 SDOperand Zero = DAG.getConstant(0, PtrVT);
1169 SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1171 if (!ST->usingLargeMem()) {
1172 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1173 // style calls, otherwise, external symbols are BRASL calls. This assumes
1174 // that declared/defined symbols are in the same compilation unit and can
1175 // be reached through PC-relative jumps.
1178 // This may be an unsafe assumption for JIT and really large compilation
1180 if (GV->isDeclaration()) {
1181 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1183 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1186 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1188 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1190 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1191 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1192 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1193 // If this is an absolute destination address that appears to be a legal
1194 // local store address, use the munged value.
1195 Callee = SDOperand(Dest, 0);
1198 Ops.push_back(Chain);
1199 Ops.push_back(Callee);
1201 // Add argument registers to the end of the list so that they are known live
1203 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1204 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1205 RegsToPass[i].second.getValueType()));
1208 Ops.push_back(InFlag);
1209 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1210 InFlag = Chain.getValue(1);
1212 Chain = DAG.getCALLSEQ_END(Chain,
1213 DAG.getConstant(NumStackBytes, PtrVT),
1214 DAG.getConstant(0, PtrVT),
1216 if (Op.Val->getValueType(0) != MVT::Other)
1217 InFlag = Chain.getValue(1);
1219 SDOperand ResultVals[3];
1220 unsigned NumResults = 0;
1223 // If the call has results, copy the values out of the ret val registers.
1224 switch (Op.Val->getValueType(0)) {
1225 default: assert(0 && "Unexpected ret value!");
1226 case MVT::Other: break;
1228 if (Op.Val->getValueType(1) == MVT::i32) {
1229 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1230 ResultVals[0] = Chain.getValue(0);
1231 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1232 Chain.getValue(2)).getValue(1);
1233 ResultVals[1] = Chain.getValue(0);
1235 NodeTys.push_back(MVT::i32);
1237 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1238 ResultVals[0] = Chain.getValue(0);
1241 NodeTys.push_back(MVT::i32);
1244 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1245 ResultVals[0] = Chain.getValue(0);
1247 NodeTys.push_back(MVT::i64);
1251 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1252 InFlag).getValue(1);
1253 ResultVals[0] = Chain.getValue(0);
1255 NodeTys.push_back(Op.Val->getValueType(0));
1262 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1263 InFlag).getValue(1);
1264 ResultVals[0] = Chain.getValue(0);
1266 NodeTys.push_back(Op.Val->getValueType(0));
1270 NodeTys.push_back(MVT::Other);
1272 // If the function returns void, just return the chain.
1273 if (NumResults == 0)
1276 // Otherwise, merge everything together with a MERGE_VALUES node.
1277 ResultVals[NumResults++] = Chain;
1278 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1279 ResultVals, NumResults);
1280 return Res.getValue(Op.ResNo);
1284 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1285 SmallVector<CCValAssign, 16> RVLocs;
1286 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1287 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1288 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1289 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1291 // If this is the first return lowered for this function, add the regs to the
1292 // liveout set for the function.
1293 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1294 for (unsigned i = 0; i != RVLocs.size(); ++i)
1295 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1298 SDOperand Chain = Op.getOperand(0);
1301 // Copy the result values into the output registers.
1302 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1303 CCValAssign &VA = RVLocs[i];
1304 assert(VA.isRegLoc() && "Can only return in registers!");
1305 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1306 Flag = Chain.getValue(1);
1310 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1312 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1316 //===----------------------------------------------------------------------===//
1317 // Vector related lowering:
1318 //===----------------------------------------------------------------------===//
1320 static ConstantSDNode *
1321 getVecImm(SDNode *N) {
1322 SDOperand OpVal(0, 0);
1324 // Check to see if this buildvec has a single non-undef value in its elements.
1325 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1326 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1328 OpVal = N->getOperand(i);
1329 else if (OpVal != N->getOperand(i))
1333 if (OpVal.Val != 0) {
1334 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1339 return 0; // All UNDEF: use implicit def.; not Constant node
1342 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1343 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1345 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1346 MVT::ValueType ValueType) {
1347 if (ConstantSDNode *CN = getVecImm(N)) {
1348 uint64_t Value = CN->getValue();
1349 if (Value <= 0x3ffff)
1350 return DAG.getConstant(Value, ValueType);
1356 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1357 /// and the value fits into a signed 16-bit constant, and if so, return the
1359 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1360 MVT::ValueType ValueType) {
1361 if (ConstantSDNode *CN = getVecImm(N)) {
1362 if (ValueType == MVT::i32) {
1363 int Value = (int) CN->getValue();
1364 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1366 if (Value == SExtValue)
1367 return DAG.getConstant(Value, ValueType);
1368 } else if (ValueType == MVT::i16) {
1369 short Value = (short) CN->getValue();
1370 int SExtValue = ((int) Value << 16) >> 16;
1372 if (Value == (short) SExtValue)
1373 return DAG.getConstant(Value, ValueType);
1374 } else if (ValueType == MVT::i64) {
1375 int64_t Value = CN->getValue();
1376 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1378 if (Value == SExtValue)
1379 return DAG.getConstant(Value, ValueType);
1386 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1387 /// and the value fits into a signed 10-bit constant, and if so, return the
1389 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1390 MVT::ValueType ValueType) {
1391 if (ConstantSDNode *CN = getVecImm(N)) {
1392 int Value = (int) CN->getValue();
1393 if ((ValueType == MVT::i32 && isS10Constant(Value))
1394 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1395 return DAG.getConstant(Value, ValueType);
1401 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1402 /// and the value fits into a signed 8-bit constant, and if so, return the
1405 /// @note: The incoming vector is v16i8 because that's the only way we can load
1406 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1408 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1409 MVT::ValueType ValueType) {
1410 if (ConstantSDNode *CN = getVecImm(N)) {
1411 int Value = (int) CN->getValue();
1412 if (ValueType == MVT::i16
1413 && Value <= 0xffff /* truncated from uint64_t */
1414 && ((short) Value >> 8) == ((short) Value & 0xff))
1415 return DAG.getConstant(Value & 0xff, ValueType);
1416 else if (ValueType == MVT::i8
1417 && (Value & 0xff) == Value)
1418 return DAG.getConstant(Value, ValueType);
1424 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1425 /// and the value fits into a signed 16-bit constant, and if so, return the
1427 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1428 MVT::ValueType ValueType) {
1429 if (ConstantSDNode *CN = getVecImm(N)) {
1430 uint64_t Value = CN->getValue();
1431 if ((ValueType == MVT::i32
1432 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1433 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1434 return DAG.getConstant(Value >> 16, ValueType);
1440 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1441 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1442 if (ConstantSDNode *CN = getVecImm(N)) {
1443 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1449 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1450 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1451 if (ConstantSDNode *CN = getVecImm(N)) {
1452 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1458 // If this is a vector of constants or undefs, get the bits. A bit in
1459 // UndefBits is set if the corresponding element of the vector is an
1460 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1461 // zero. Return true if this is not an array of constants, false if it is.
1463 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1464 uint64_t UndefBits[2]) {
1465 // Start with zero'd results.
1466 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1468 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1469 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1470 SDOperand OpVal = BV->getOperand(i);
1472 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1473 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1475 uint64_t EltBits = 0;
1476 if (OpVal.getOpcode() == ISD::UNDEF) {
1477 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1478 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1480 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1481 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1482 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1483 const APFloat &apf = CN->getValueAPF();
1484 EltBits = (CN->getValueType(0) == MVT::f32
1485 ? FloatToBits(apf.convertToFloat())
1486 : DoubleToBits(apf.convertToDouble()));
1488 // Nonconstant element.
1492 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1495 //printf("%llx %llx %llx %llx\n",
1496 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1500 /// If this is a splat (repetition) of a value across the whole vector, return
1501 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1502 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1503 /// SplatSize = 1 byte.
1504 static bool isConstantSplat(const uint64_t Bits128[2],
1505 const uint64_t Undef128[2],
1507 uint64_t &SplatBits, uint64_t &SplatUndef,
1509 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1510 // the same as the lower 64-bits, ignoring undefs.
1511 uint64_t Bits64 = Bits128[0] | Bits128[1];
1512 uint64_t Undef64 = Undef128[0] & Undef128[1];
1513 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1514 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1515 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1516 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1518 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1519 if (MinSplatBits < 64) {
1521 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1523 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1524 if (MinSplatBits < 32) {
1526 // If the top 16-bits are different than the lower 16-bits, ignoring
1527 // undefs, we have an i32 splat.
1528 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1529 if (MinSplatBits < 16) {
1530 // If the top 8-bits are different than the lower 8-bits, ignoring
1531 // undefs, we have an i16 splat.
1532 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1533 // Otherwise, we have an 8-bit splat.
1534 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1535 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1541 SplatUndef = Undef16;
1548 SplatUndef = Undef32;
1554 SplatBits = Bits128[0];
1555 SplatUndef = Undef128[0];
1561 return false; // Can't be a splat if two pieces don't match.
1564 // If this is a case we can't handle, return null and let the default
1565 // expansion code take care of it. If we CAN select this case, and if it
1566 // selects to a single instruction, return Op. Otherwise, if we can codegen
1567 // this case more efficiently than a constant pool load, lower it to the
1568 // sequence of ops that should be used.
1569 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1570 MVT::ValueType VT = Op.getValueType();
1571 // If this is a vector of constants or undefs, get the bits. A bit in
1572 // UndefBits is set if the corresponding element of the vector is an
1573 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1575 uint64_t VectorBits[2];
1576 uint64_t UndefBits[2];
1577 uint64_t SplatBits, SplatUndef;
1579 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1580 || !isConstantSplat(VectorBits, UndefBits,
1581 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1582 SplatBits, SplatUndef, SplatSize))
1583 return SDOperand(); // Not a constant vector, not a splat.
1588 uint32_t Value32 = SplatBits;
1589 assert(SplatSize == 4
1590 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1591 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1592 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1593 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1594 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1598 uint64_t f64val = SplatBits;
1599 assert(SplatSize == 8
1600 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1601 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1602 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1603 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1604 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1608 // 8-bit constants have to be expanded to 16-bits
1609 unsigned short Value16 = SplatBits | (SplatBits << 8);
1611 for (int i = 0; i < 8; ++i)
1612 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1613 return DAG.getNode(ISD::BIT_CONVERT, VT,
1614 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1617 unsigned short Value16;
1619 Value16 = (unsigned short) (SplatBits & 0xffff);
1621 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1622 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1624 for (int i = 0; i < 8; ++i) Ops[i] = T;
1625 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1628 unsigned int Value = SplatBits;
1629 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1630 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1633 uint64_t val = SplatBits;
1634 uint32_t upper = uint32_t(val >> 32);
1635 uint32_t lower = uint32_t(val);
1640 SmallVector<SDOperand, 16> ShufBytes;
1642 bool upper_special, lower_special;
1644 // NOTE: This code creates common-case shuffle masks that can be easily
1645 // detected as common expressions. It is not attempting to create highly
1646 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1648 // Detect if the upper or lower half is a special shuffle mask pattern:
1649 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1650 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1652 // Create lower vector if not a special pattern
1653 if (!lower_special) {
1654 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1655 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1656 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1657 LO32C, LO32C, LO32C, LO32C));
1660 // Create upper vector if not a special pattern
1661 if (!upper_special) {
1662 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1663 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1664 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1665 HI32C, HI32C, HI32C, HI32C));
1668 // If either upper or lower are special, then the two input operands are
1669 // the same (basically, one of them is a "don't care")
1674 if (lower_special && upper_special) {
1675 // Unhappy situation... both upper and lower are special, so punt with
1676 // a target constant:
1677 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1678 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1682 for (int i = 0; i < 4; ++i) {
1683 for (int j = 0; j < 4; ++j) {
1685 bool process_upper, process_lower;
1688 process_upper = (upper_special && (i & 1) == 0);
1689 process_lower = (lower_special && (i & 1) == 1);
1691 if (process_upper || process_lower) {
1692 if ((process_upper && upper == 0)
1693 || (process_lower && lower == 0))
1695 else if ((process_upper && upper == 0xffffffff)
1696 || (process_lower && lower == 0xffffffff))
1698 else if ((process_upper && upper == 0x80000000)
1699 || (process_lower && lower == 0x80000000))
1700 val = (j == 0 ? 0xe0 : 0x80);
1702 val = i * 4 + j + ((i & 1) * 16);
1704 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1708 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1709 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1710 &ShufBytes[0], ShufBytes.size()));
1712 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1713 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1714 return DAG.getNode(ISD::BIT_CONVERT, VT,
1715 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1716 Zero, Zero, Zero, Zero));
1724 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1725 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1726 /// permutation vector, V3, is monotonically increasing with one "exception"
1727 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1728 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1729 /// In either case, the net result is going to eventually invoke SHUFB to
1730 /// permute/shuffle the bytes from V1 and V2.
1732 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1733 /// control word for byte/halfword/word insertion. This takes care of a single
1734 /// element move from V2 into V1.
1736 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1737 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1738 SDOperand V1 = Op.getOperand(0);
1739 SDOperand V2 = Op.getOperand(1);
1740 SDOperand PermMask = Op.getOperand(2);
1742 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1744 // If we have a single element being moved from V1 to V2, this can be handled
1745 // using the C*[DX] compute mask instructions, but the vector elements have
1746 // to be monotonically increasing with one exception element.
1747 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1748 unsigned EltsFromV2 = 0;
1750 unsigned V2EltIdx0 = 0;
1751 unsigned CurrElt = 0;
1752 bool monotonic = true;
1753 if (EltVT == MVT::i8)
1755 else if (EltVT == MVT::i16)
1757 else if (EltVT == MVT::i32)
1760 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1762 for (unsigned i = 0, e = PermMask.getNumOperands();
1763 EltsFromV2 <= 1 && monotonic && i != e;
1766 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1769 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1771 if (SrcElt >= V2EltIdx0) {
1773 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1774 } else if (CurrElt != SrcElt) {
1781 if (EltsFromV2 == 1 && monotonic) {
1782 // Compute mask and shuffle
1783 MachineFunction &MF = DAG.getMachineFunction();
1784 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1785 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1786 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1787 // Initialize temporary register to 0
1788 SDOperand InitTempReg =
1789 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1790 // Copy register's contents as index in INSERT_MASK:
1791 SDOperand ShufMaskOp =
1792 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1793 DAG.getTargetConstant(V2Elt, MVT::i32),
1794 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1795 // Use shuffle mask in SHUFB synthetic instruction:
1796 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1798 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1799 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1801 SmallVector<SDOperand, 16> ResultMask;
1802 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1804 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1807 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1809 for (unsigned j = 0; j < BytesPerElement; ++j) {
1810 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1815 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1816 &ResultMask[0], ResultMask.size());
1817 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1821 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1822 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1824 if (Op0.Val->getOpcode() == ISD::Constant) {
1825 // For a constant, build the appropriate constant vector, which will
1826 // eventually simplify to a vector register load.
1828 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1829 SmallVector<SDOperand, 16> ConstVecValues;
1833 // Create a constant vector:
1834 switch (Op.getValueType()) {
1835 default: assert(0 && "Unexpected constant value type in "
1836 "LowerSCALAR_TO_VECTOR");
1837 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1838 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1839 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1840 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1841 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1842 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1845 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1846 for (size_t j = 0; j < n_copies; ++j)
1847 ConstVecValues.push_back(CValue);
1849 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1850 &ConstVecValues[0], ConstVecValues.size());
1852 // Otherwise, copy the value from one register to another:
1853 switch (Op0.getValueType()) {
1854 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1861 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1868 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1869 switch (Op.getValueType()) {
1871 SDOperand rA = Op.getOperand(0);
1872 SDOperand rB = Op.getOperand(1);
1873 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1874 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1875 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1876 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1878 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1882 // Multiply two v8i16 vectors (pipeline friendly version):
1883 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1884 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1885 // c) Use SELB to select upper and lower halves from the intermediate results
1887 // NOTE: We really want to move the FSMBI to earlier to actually get the
1888 // dual-issue. This code does manage to do this, even if it's a little on
1891 MachineFunction &MF = DAG.getMachineFunction();
1892 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1893 SDOperand Chain = Op.getOperand(0);
1894 SDOperand rA = Op.getOperand(0);
1895 SDOperand rB = Op.getOperand(1);
1896 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1897 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1900 DAG.getCopyToReg(Chain, FSMBIreg,
1901 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1902 DAG.getConstant(0xcccc, MVT::i32)));
1905 DAG.getCopyToReg(FSMBOp, HiProdReg,
1906 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1908 SDOperand HHProd_v4i32 =
1909 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1910 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1912 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1913 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1914 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1915 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1917 DAG.getConstant(16, MVT::i16))),
1918 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1921 // This M00sE is N@stI! (apologies to Monty Python)
1923 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1924 // is to break it all apart, sign extend, and reassemble the various
1925 // intermediate products.
1927 SDOperand rA = Op.getOperand(0);
1928 SDOperand rB = Op.getOperand(1);
1929 SDOperand c8 = DAG.getConstant(8, MVT::i32);
1930 SDOperand c16 = DAG.getConstant(16, MVT::i32);
1933 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1934 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1935 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1937 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1939 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1942 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1943 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1945 SDOperand FSMBmask = DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1946 DAG.getConstant(0x2222, MVT::i32));
1948 SDOperand LoProdParts =
1949 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1950 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1951 LLProd, LHProd, FSMBmask));
1953 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1956 DAG.getNode(ISD::AND, MVT::v4i32,
1958 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1959 LoProdMask, LoProdMask,
1960 LoProdMask, LoProdMask));
1963 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1964 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1967 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1968 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1971 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1972 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1973 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1975 SDOperand HHProd_1 =
1976 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1977 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1978 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1979 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1980 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1983 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1985 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
1989 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
1991 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
1992 DAG.getNode(ISD::OR, MVT::v4i32,
1997 cerr << "CellSPU: Unknown vector multiplication, got "
1998 << MVT::getValueTypeString(Op.getValueType())
2007 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2008 MachineFunction &MF = DAG.getMachineFunction();
2009 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2011 SDOperand A = Op.getOperand(0);
2012 SDOperand B = Op.getOperand(1);
2013 unsigned VT = Op.getValueType();
2015 unsigned VRegBR, VRegC;
2017 if (VT == MVT::f32) {
2018 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2019 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2021 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2022 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2024 // TODO: make sure we're feeding FPInterp the right arguments
2025 // Right now: fi B, frest(B)
2028 // (Floating Interpolate (FP Reciprocal Estimate B))
2030 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2031 DAG.getNode(SPUISD::FPInterp, VT, B,
2032 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2034 // Computes A * BRcpl and stores in a temporary register
2036 DAG.getCopyToReg(BRcpl, VRegC,
2037 DAG.getNode(ISD::FMUL, VT, A,
2038 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2039 // What's the Chain variable do? It's magic!
2040 // TODO: set Chain = Op(0).getEntryNode()
2042 return DAG.getNode(ISD::FADD, VT,
2043 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2044 DAG.getNode(ISD::FMUL, VT,
2045 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2046 DAG.getNode(ISD::FSUB, VT, A,
2047 DAG.getNode(ISD::FMUL, VT, B,
2048 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2051 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2052 unsigned VT = Op.getValueType();
2053 SDOperand N = Op.getOperand(0);
2054 SDOperand Elt = Op.getOperand(1);
2055 SDOperand ShufMask[16];
2056 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2058 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2060 int EltNo = (int) C->getValue();
2063 if (VT == MVT::i8 && EltNo >= 16)
2064 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2065 else if (VT == MVT::i16 && EltNo >= 8)
2066 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2067 else if (VT == MVT::i32 && EltNo >= 4)
2068 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2069 else if (VT == MVT::i64 && EltNo >= 2)
2070 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2072 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2073 // i32 and i64: Element 0 is the preferred slot
2074 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2077 // Need to generate shuffle mask and extract:
2078 int prefslot_begin = -1, prefslot_end = -1;
2079 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2083 prefslot_begin = prefslot_end = 3;
2087 prefslot_begin = 2; prefslot_end = 3;
2091 prefslot_begin = 0; prefslot_end = 3;
2095 prefslot_begin = 0; prefslot_end = 7;
2100 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2101 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2103 for (int i = 0; i < 16; ++i) {
2104 // zero fill uppper part of preferred slot, don't care about the
2106 unsigned int mask_val;
2108 if (i <= prefslot_end) {
2110 ((i < prefslot_begin)
2112 : elt_byte + (i - prefslot_begin));
2114 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2116 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2119 SDOperand ShufMaskVec =
2120 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2122 sizeof(ShufMask) / sizeof(ShufMask[0]));
2124 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2125 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2126 N, N, ShufMaskVec));
2130 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2131 SDOperand VecOp = Op.getOperand(0);
2132 SDOperand ValOp = Op.getOperand(1);
2133 SDOperand IdxOp = Op.getOperand(2);
2134 MVT::ValueType VT = Op.getValueType();
2136 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2137 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2139 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2140 // Use $2 because it's always 16-byte aligned and it's available:
2141 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2144 DAG.getNode(SPUISD::SHUFB, VT,
2145 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2147 DAG.getNode(SPUISD::INSERT_MASK, VT,
2148 DAG.getNode(ISD::ADD, PtrVT,
2150 DAG.getConstant(CN->getValue(),
2156 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2158 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2160 assert(Op.getValueType() == MVT::i8);
2163 assert(0 && "Unhandled i8 math operator");
2167 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2169 SDOperand N1 = Op.getOperand(1);
2170 N0 = (N0.getOpcode() != ISD::Constant
2171 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2172 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2173 N1 = (N1.getOpcode() != ISD::Constant
2174 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2175 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2176 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2177 DAG.getNode(Opc, MVT::i16, N0, N1));
2181 SDOperand N1 = Op.getOperand(1);
2183 N0 = (N0.getOpcode() != ISD::Constant
2184 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2185 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2186 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2187 N1 = (N1.getOpcode() != ISD::Constant
2188 ? DAG.getNode(N1Opc, MVT::i16, N1)
2189 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2190 SDOperand ExpandArg =
2191 DAG.getNode(ISD::OR, MVT::i16, N0,
2192 DAG.getNode(ISD::SHL, MVT::i16,
2193 N0, DAG.getConstant(8, MVT::i16)));
2194 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2195 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2199 SDOperand N1 = Op.getOperand(1);
2201 N0 = (N0.getOpcode() != ISD::Constant
2202 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2203 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2204 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2205 N1 = (N1.getOpcode() != ISD::Constant
2206 ? DAG.getNode(N1Opc, MVT::i16, N1)
2207 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2208 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2209 DAG.getNode(Opc, MVT::i16, N0, N1));
2212 SDOperand N1 = Op.getOperand(1);
2214 N0 = (N0.getOpcode() != ISD::Constant
2215 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2216 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2217 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2218 N1 = (N1.getOpcode() != ISD::Constant
2219 ? DAG.getNode(N1Opc, MVT::i16, N1)
2220 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2221 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2222 DAG.getNode(Opc, MVT::i16, N0, N1));
2225 SDOperand N1 = Op.getOperand(1);
2227 N0 = (N0.getOpcode() != ISD::Constant
2228 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2229 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2230 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2231 N1 = (N1.getOpcode() != ISD::Constant
2232 ? DAG.getNode(N1Opc, MVT::i16, N1)
2233 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2234 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2235 DAG.getNode(Opc, MVT::i16, N0, N1));
2243 static SDOperand LowerI64Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2245 MVT::ValueType VT = Op.getValueType();
2247 MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2249 SDOperand Op0 = Op.getOperand(0);
2252 case ISD::ZERO_EXTEND:
2253 case ISD::SIGN_EXTEND:
2254 case ISD::ANY_EXTEND: {
2255 MVT::ValueType Op0VT = Op0.getValueType();
2257 MVT::getVectorType(Op0VT, (128 / MVT::getSizeInBits(Op0VT)));
2259 assert(Op0VT == MVT::i32
2260 && "CellSPU: Zero/sign extending something other than i32");
2262 unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2263 ? SPUISD::ROTBYTES_RIGHT_S
2264 : SPUISD::ROTQUAD_RZ_BYTES);
2265 SDOperand PromoteScalar =
2266 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2268 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2269 DAG.getNode(ISD::BIT_CONVERT, VecVT,
2270 DAG.getNode(NewOpc, Op0VecVT,
2272 DAG.getConstant(4, MVT::i32))));
2276 SDOperand ShiftAmt = Op.getOperand(1);
2277 unsigned ShiftAmtVT = unsigned(ShiftAmt.getValueType());
2278 SDOperand Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2279 SDOperand MaskLower =
2280 DAG.getNode(SPUISD::SELB, VecVT,
2282 DAG.getConstant(0, VecVT),
2283 DAG.getNode(SPUISD::FSMBI, VecVT,
2284 DAG.getConstant(0xff00ULL, MVT::i16)));
2285 SDOperand ShiftAmtBytes =
2286 DAG.getNode(ISD::SRL, ShiftAmtVT,
2288 DAG.getConstant(3, ShiftAmtVT));
2289 SDOperand ShiftAmtBits =
2290 DAG.getNode(ISD::AND, ShiftAmtVT,
2292 DAG.getConstant(7, ShiftAmtVT));
2294 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2295 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2296 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2297 MaskLower, ShiftAmtBytes),
2302 unsigned VT = unsigned(Op.getValueType());
2303 SDOperand ShiftAmt = Op.getOperand(1);
2304 unsigned ShiftAmtVT = unsigned(ShiftAmt.getValueType());
2305 SDOperand ShiftAmtBytes =
2306 DAG.getNode(ISD::SRL, ShiftAmtVT,
2308 DAG.getConstant(3, ShiftAmtVT));
2309 SDOperand ShiftAmtBits =
2310 DAG.getNode(ISD::AND, ShiftAmtVT,
2312 DAG.getConstant(7, ShiftAmtVT));
2314 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2315 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2316 Op0, ShiftAmtBytes),
2324 //! Lower byte immediate operations for v16i8 vectors:
2326 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2329 MVT::ValueType VT = Op.getValueType();
2331 ConstVec = Op.getOperand(0);
2332 Arg = Op.getOperand(1);
2333 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2334 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2335 ConstVec = ConstVec.getOperand(0);
2337 ConstVec = Op.getOperand(1);
2338 Arg = Op.getOperand(0);
2339 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2340 ConstVec = ConstVec.getOperand(0);
2345 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2346 uint64_t VectorBits[2];
2347 uint64_t UndefBits[2];
2348 uint64_t SplatBits, SplatUndef;
2351 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2352 && isConstantSplat(VectorBits, UndefBits,
2353 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2354 SplatBits, SplatUndef, SplatSize)) {
2355 SDOperand tcVec[16];
2356 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2357 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2359 // Turn the BUILD_VECTOR into a set of target constants:
2360 for (size_t i = 0; i < tcVecSize; ++i)
2363 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2364 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2371 //! Lower i32 multiplication
2372 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2376 cerr << "CellSPU: Unknown LowerMUL value type, got "
2377 << MVT::getValueTypeString(Op.getValueType())
2383 SDOperand rA = Op.getOperand(0);
2384 SDOperand rB = Op.getOperand(1);
2386 return DAG.getNode(ISD::ADD, MVT::i32,
2387 DAG.getNode(ISD::ADD, MVT::i32,
2388 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2389 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2390 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2397 //! Custom lowering for CTPOP (count population)
2399 Custom lowering code that counts the number ones in the input
2400 operand. SPU has such an instruction, but it counts the number of
2401 ones per byte, which then have to be accumulated.
2403 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2404 unsigned VT = Op.getValueType();
2405 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2409 SDOperand N = Op.getOperand(0);
2410 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2412 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2413 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2415 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2419 MachineFunction &MF = DAG.getMachineFunction();
2420 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2422 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2424 SDOperand N = Op.getOperand(0);
2425 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2426 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2427 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2429 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2430 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2432 // CNTB_result becomes the chain to which all of the virtual registers
2433 // CNTB_reg, SUM1_reg become associated:
2434 SDOperand CNTB_result =
2435 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2437 SDOperand CNTB_rescopy =
2438 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2440 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2442 return DAG.getNode(ISD::AND, MVT::i16,
2443 DAG.getNode(ISD::ADD, MVT::i16,
2444 DAG.getNode(ISD::SRL, MVT::i16,
2451 MachineFunction &MF = DAG.getMachineFunction();
2452 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2454 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2455 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2457 SDOperand N = Op.getOperand(0);
2458 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2459 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2460 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2461 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2463 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2464 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2466 // CNTB_result becomes the chain to which all of the virtual registers
2467 // CNTB_reg, SUM1_reg become associated:
2468 SDOperand CNTB_result =
2469 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2471 SDOperand CNTB_rescopy =
2472 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2475 DAG.getNode(ISD::SRL, MVT::i32,
2476 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2479 DAG.getNode(ISD::ADD, MVT::i32,
2480 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2482 SDOperand Sum1_rescopy =
2483 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2486 DAG.getNode(ISD::SRL, MVT::i32,
2487 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2490 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2491 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2493 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2503 /// LowerOperation - Provide custom lowering hooks for some operations.
2506 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2508 unsigned Opc = (unsigned) Op.getOpcode();
2509 unsigned VT = (unsigned) Op.getValueType();
2513 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2514 cerr << "Op.getOpcode() = " << Opc << "\n";
2515 cerr << "*Op.Val:\n";
2522 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2524 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2525 case ISD::ConstantPool:
2526 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2527 case ISD::GlobalAddress:
2528 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2529 case ISD::JumpTable:
2530 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2532 return LowerConstant(Op, DAG);
2533 case ISD::ConstantFP:
2534 return LowerConstantFP(Op, DAG);
2536 return LowerBRCOND(Op, DAG);
2537 case ISD::FORMAL_ARGUMENTS:
2538 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2540 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2542 return LowerRET(Op, DAG, getTargetMachine());
2545 // i8, i64 math ops:
2546 case ISD::ZERO_EXTEND:
2547 case ISD::SIGN_EXTEND:
2548 case ISD::ANY_EXTEND:
2556 return LowerI8Math(Op, DAG, Opc);
2557 else if (VT == MVT::i64)
2558 return LowerI64Math(Op, DAG, Opc);
2561 // Vector-related lowering.
2562 case ISD::BUILD_VECTOR:
2563 return LowerBUILD_VECTOR(Op, DAG);
2564 case ISD::SCALAR_TO_VECTOR:
2565 return LowerSCALAR_TO_VECTOR(Op, DAG);
2566 case ISD::VECTOR_SHUFFLE:
2567 return LowerVECTOR_SHUFFLE(Op, DAG);
2568 case ISD::EXTRACT_VECTOR_ELT:
2569 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2570 case ISD::INSERT_VECTOR_ELT:
2571 return LowerINSERT_VECTOR_ELT(Op, DAG);
2573 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2577 return LowerByteImmed(Op, DAG);
2579 // Vector and i8 multiply:
2581 if (MVT::isVector(VT))
2582 return LowerVectorMUL(Op, DAG);
2583 else if (VT == MVT::i8)
2584 return LowerI8Math(Op, DAG, Opc);
2586 return LowerMUL(Op, DAG, VT, Opc);
2589 if (VT == MVT::f32 || VT == MVT::v4f32)
2590 return LowerFDIVf32(Op, DAG);
2591 // else if (Op.getValueType() == MVT::f64)
2592 // return LowerFDIVf64(Op, DAG);
2594 assert(0 && "Calling FDIV on unsupported MVT");
2597 return LowerCTPOP(Op, DAG);
2603 //===----------------------------------------------------------------------===//
2604 // Target Optimization Hooks
2605 //===----------------------------------------------------------------------===//
2608 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2611 TargetMachine &TM = getTargetMachine();
2613 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2614 SelectionDAG &DAG = DCI.DAG;
2615 SDOperand Op0 = N->getOperand(0); // everything has at least one operand
2616 SDOperand Result; // Initially, NULL result
2618 switch (N->getOpcode()) {
2621 SDOperand Op1 = N->getOperand(1);
2623 if ((Op1.getOpcode() == ISD::Constant
2624 || Op1.getOpcode() == ISD::TargetConstant)
2625 && Op0.getOpcode() == SPUISD::IndirectAddr) {
2626 SDOperand Op01 = Op0.getOperand(1);
2627 if (Op01.getOpcode() == ISD::Constant
2628 || Op01.getOpcode() == ISD::TargetConstant) {
2629 // (add <const>, (SPUindirect <arg>, <const>)) ->
2630 // (SPUindirect <arg>, <const + const>)
2631 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2632 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2633 SDOperand combinedConst =
2634 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2635 Op0.getValueType());
2637 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2638 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2639 DEBUG(cerr << "With: (SPUindirect <arg>, "
2640 << CN0->getValue() + CN1->getValue() << ")\n");
2641 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2642 Op0.getOperand(0), combinedConst);
2644 } else if ((Op0.getOpcode() == ISD::Constant
2645 || Op0.getOpcode() == ISD::TargetConstant)
2646 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2647 SDOperand Op11 = Op1.getOperand(1);
2648 if (Op11.getOpcode() == ISD::Constant
2649 || Op11.getOpcode() == ISD::TargetConstant) {
2650 // (add (SPUindirect <arg>, <const>), <const>) ->
2651 // (SPUindirect <arg>, <const + const>)
2652 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2653 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2654 SDOperand combinedConst =
2655 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2656 Op0.getValueType());
2658 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2659 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2660 DEBUG(cerr << "With: (SPUindirect <arg>, "
2661 << CN0->getValue() + CN1->getValue() << ")\n");
2663 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2664 Op1.getOperand(0), combinedConst);
2669 case ISD::SIGN_EXTEND:
2670 case ISD::ZERO_EXTEND:
2671 case ISD::ANY_EXTEND: {
2672 if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2673 N->getValueType(0) == Op0.getValueType()) {
2674 // (any_extend (SPUextract_elt0 <arg>)) ->
2675 // (SPUextract_elt0 <arg>)
2676 // Types must match, however...
2677 DEBUG(cerr << "Replace: ");
2678 DEBUG(N->dump(&DAG));
2679 DEBUG(cerr << "\nWith: ");
2680 DEBUG(Op0.Val->dump(&DAG));
2681 DEBUG(cerr << "\n");
2687 case SPUISD::IndirectAddr: {
2688 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2689 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2690 if (CN->getValue() == 0) {
2691 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2692 // (SPUaform <addr>, 0)
2694 DEBUG(cerr << "Replace: ");
2695 DEBUG(N->dump(&DAG));
2696 DEBUG(cerr << "\nWith: ");
2697 DEBUG(Op0.Val->dump(&DAG));
2698 DEBUG(cerr << "\n");
2705 case SPUISD::SHLQUAD_L_BITS:
2706 case SPUISD::SHLQUAD_L_BYTES:
2707 case SPUISD::VEC_SHL:
2708 case SPUISD::VEC_SRL:
2709 case SPUISD::VEC_SRA:
2710 case SPUISD::ROTQUAD_RZ_BYTES:
2711 case SPUISD::ROTQUAD_RZ_BITS: {
2712 SDOperand Op1 = N->getOperand(1);
2714 if (isa<ConstantSDNode>(Op1)) {
2715 // Kill degenerate vector shifts:
2716 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2718 if (CN->getValue() == 0) {
2724 case SPUISD::PROMOTE_SCALAR: {
2725 switch (Op0.getOpcode()) {
2728 case ISD::ANY_EXTEND:
2729 case ISD::ZERO_EXTEND:
2730 case ISD::SIGN_EXTEND: {
2731 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2733 // but only if the SPUpromote_scalar and <arg> types match.
2734 SDOperand Op00 = Op0.getOperand(0);
2735 if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2736 SDOperand Op000 = Op00.getOperand(0);
2737 if (Op000.getValueType() == N->getValueType(0)) {
2743 case SPUISD::EXTRACT_ELT0: {
2744 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2746 Result = Op0.getOperand(0);
2753 // Otherwise, return unchanged.
2756 DEBUG(cerr << "\nReplace.SPU: ");
2757 DEBUG(N->dump(&DAG));
2758 DEBUG(cerr << "\nWith: ");
2759 DEBUG(Result.Val->dump(&DAG));
2760 DEBUG(cerr << "\n");
2767 //===----------------------------------------------------------------------===//
2768 // Inline Assembly Support
2769 //===----------------------------------------------------------------------===//
2771 /// getConstraintType - Given a constraint letter, return the type of
2772 /// constraint it is for this target.
2773 SPUTargetLowering::ConstraintType
2774 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2775 if (ConstraintLetter.size() == 1) {
2776 switch (ConstraintLetter[0]) {
2783 return C_RegisterClass;
2786 return TargetLowering::getConstraintType(ConstraintLetter);
2789 std::pair<unsigned, const TargetRegisterClass*>
2790 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2791 MVT::ValueType VT) const
2793 if (Constraint.size() == 1) {
2794 // GCC RS6000 Constraint Letters
2795 switch (Constraint[0]) {
2799 return std::make_pair(0U, SPU::R64CRegisterClass);
2800 return std::make_pair(0U, SPU::R32CRegisterClass);
2803 return std::make_pair(0U, SPU::R32FPRegisterClass);
2804 else if (VT == MVT::f64)
2805 return std::make_pair(0U, SPU::R64FPRegisterClass);
2808 return std::make_pair(0U, SPU::GPRCRegisterClass);
2812 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2815 //! Compute used/known bits for a SPU operand
2817 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2821 const SelectionDAG &DAG,
2822 unsigned Depth ) const {
2823 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2825 switch (Op.getOpcode()) {
2827 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2837 case SPUISD::PROMOTE_SCALAR: {
2838 SDOperand Op0 = Op.getOperand(0);
2839 uint64_t InMask = MVT::getIntVTBitMask(Op0.getValueType());
2840 KnownZero |= APInt(uint64_sizebits, ~InMask, false);
2841 KnownOne |= APInt(uint64_sizebits, InMask, false);
2845 case SPUISD::LDRESULT:
2846 case SPUISD::EXTRACT_ELT0:
2847 case SPUISD::EXTRACT_ELT0_CHAINED: {
2848 uint64_t InMask = MVT::getIntVTBitMask(Op.getValueType());
2849 KnownZero |= APInt(uint64_sizebits, ~InMask, false);
2850 KnownOne |= APInt(uint64_sizebits, InMask, false);
2855 case EXTRACT_I1_ZEXT:
2856 case EXTRACT_I1_SEXT:
2857 case EXTRACT_I8_ZEXT:
2858 case EXTRACT_I8_SEXT:
2863 case SHLQUAD_L_BITS:
2864 case SHLQUAD_L_BYTES:
2870 case ROTQUAD_RZ_BYTES:
2871 case ROTQUAD_RZ_BITS:
2872 case ROTBYTES_RIGHT_S:
2874 case ROTBYTES_LEFT_CHAINED:
2885 // LowerAsmOperandForConstraint
2887 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2888 char ConstraintLetter,
2889 std::vector<SDOperand> &Ops,
2890 SelectionDAG &DAG) {
2891 // Default, for the time being, to the base class handler
2892 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2895 /// isLegalAddressImmediate - Return true if the integer value can be used
2896 /// as the offset of the target addressing mode.
2897 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2898 // SPU's addresses are 256K:
2899 return (V > -(1 << 18) && V < (1 << 18) - 1);
2902 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {