1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "llvm/ADT/VectorExtras.h"
18 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT::ValueType mapping to useful data for Cell SPU
41 struct valtype_map_s {
42 const MVT::ValueType valtype;
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
72 << MVT::getValueTypeString(VT)
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDOperand &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDOperand &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 // NOTE: i8 register class is not registered because we cannot determine when
126 // we need to zero or sign extend for custom-lowered loads and stores.
127 // NOTE: Ignore the previous note. For now. :-)
128 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
129 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
130 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
131 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
132 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
133 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
134 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
136 // SPU has no sign or zero extended loads for i1, i8, i16:
137 setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote);
138 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
139 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
140 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
141 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
142 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
143 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
144 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
146 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
147 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
148 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
149 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
150 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
151 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
152 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
153 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
155 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
156 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
157 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
159 // SPU constant load actions are custom lowered:
160 setOperationAction(ISD::Constant, MVT::i64, Custom);
161 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
162 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
164 // SPU's loads and stores have to be custom lowered:
165 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
167 setOperationAction(ISD::LOAD, sctype, Custom);
168 setOperationAction(ISD::STORE, sctype, Custom);
171 // Custom lower BRCOND for i1, i8 to "promote" the result to
172 // i32 and i16, respectively.
173 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
175 // Expand the jumptable branches
176 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
177 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
178 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
180 // SPU has no intrinsics for these particular operations:
181 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
182 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
183 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
184 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
186 // PowerPC has no SREM/UREM instructions
187 setOperationAction(ISD::SREM, MVT::i32, Expand);
188 setOperationAction(ISD::UREM, MVT::i32, Expand);
189 setOperationAction(ISD::SREM, MVT::i64, Expand);
190 setOperationAction(ISD::UREM, MVT::i64, Expand);
192 // We don't support sin/cos/sqrt/fmod
193 setOperationAction(ISD::FSIN , MVT::f64, Expand);
194 setOperationAction(ISD::FCOS , MVT::f64, Expand);
195 setOperationAction(ISD::FREM , MVT::f64, Expand);
196 setOperationAction(ISD::FSIN , MVT::f32, Expand);
197 setOperationAction(ISD::FCOS , MVT::f32, Expand);
198 setOperationAction(ISD::FREM , MVT::f32, Expand);
200 // If we're enabling GP optimizations, use hardware square root
201 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
202 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
204 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
205 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
207 // SPU can do rotate right and left, so legalize it... but customize for i8
208 // because instructions don't exist.
209 setOperationAction(ISD::ROTR, MVT::i32, Legal);
210 setOperationAction(ISD::ROTR, MVT::i16, Legal);
211 setOperationAction(ISD::ROTR, MVT::i8, Custom);
212 setOperationAction(ISD::ROTL, MVT::i32, Legal);
213 setOperationAction(ISD::ROTL, MVT::i16, Legal);
214 setOperationAction(ISD::ROTL, MVT::i8, Custom);
215 // SPU has no native version of shift left/right for i8
216 setOperationAction(ISD::SHL, MVT::i8, Custom);
217 setOperationAction(ISD::SRL, MVT::i8, Custom);
218 setOperationAction(ISD::SRA, MVT::i8, Custom);
219 // And SPU needs custom lowering for shift left/right for i64
220 setOperationAction(ISD::SHL, MVT::i64, Custom);
221 setOperationAction(ISD::SRL, MVT::i64, Custom);
222 setOperationAction(ISD::SRA, MVT::i64, Custom);
224 // Custom lower i32 multiplications
225 setOperationAction(ISD::MUL, MVT::i32, Custom);
227 // Need to custom handle (some) common i8 math ops
228 setOperationAction(ISD::SUB, MVT::i8, Custom);
229 setOperationAction(ISD::MUL, MVT::i8, Custom);
231 // SPU does not have BSWAP. It does have i32 support CTLZ.
232 // CTPOP has to be custom lowered.
233 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
234 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
236 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
237 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
238 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
239 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
241 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
242 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
244 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
246 // SPU has a version of select
247 setOperationAction(ISD::SELECT, MVT::i1, Promote);
248 setOperationAction(ISD::SELECT, MVT::i8, Legal);
249 setOperationAction(ISD::SELECT, MVT::i16, Legal);
250 setOperationAction(ISD::SELECT, MVT::i32, Legal);
251 setOperationAction(ISD::SELECT, MVT::i64, Expand);
252 setOperationAction(ISD::SELECT, MVT::f32, Expand);
253 setOperationAction(ISD::SELECT, MVT::f64, Expand);
255 setOperationAction(ISD::SETCC, MVT::i1, Promote);
256 setOperationAction(ISD::SETCC, MVT::i8, Legal);
257 setOperationAction(ISD::SETCC, MVT::i16, Legal);
258 setOperationAction(ISD::SETCC, MVT::i32, Legal);
259 setOperationAction(ISD::SETCC, MVT::i64, Expand);
260 setOperationAction(ISD::SETCC, MVT::f32, Expand);
261 setOperationAction(ISD::SETCC, MVT::f64, Expand);
263 // Zero extension and sign extension for i64 have to be
265 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
266 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
267 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
269 // SPU has a legal FP -> signed INT instruction
270 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
271 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
272 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
273 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
275 // FDIV on SPU requires custom lowering
276 setOperationAction(ISD::FDIV, MVT::f32, Custom);
277 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
279 // SPU has [U|S]INT_TO_FP
280 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
281 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
282 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
283 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
284 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
285 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
286 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
287 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
289 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
290 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
291 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
292 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
294 // We cannot sextinreg(i1). Expand to shifts.
295 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
297 // Support label based line numbers.
298 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
299 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
301 // We want to legalize GlobalAddress and ConstantPool nodes into the
302 // appropriate instructions to materialize the address.
303 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
305 setOperationAction(ISD::GlobalAddress, sctype, Custom);
306 setOperationAction(ISD::ConstantPool, sctype, Custom);
307 setOperationAction(ISD::JumpTable, sctype, Custom);
310 // RET must be custom lowered, to meet ABI requirements
311 setOperationAction(ISD::RET, MVT::Other, Custom);
313 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
314 setOperationAction(ISD::VASTART , MVT::Other, Custom);
316 // Use the default implementation.
317 setOperationAction(ISD::VAARG , MVT::Other, Expand);
318 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
319 setOperationAction(ISD::VAEND , MVT::Other, Expand);
320 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
321 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
322 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
323 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
324 setOperationAction(ISD::PREFETCH , MVT::Other, Expand);
326 // Cell SPU has instructions for converting between i64 and fp.
327 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
328 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
330 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
331 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
333 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
334 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
336 // First set operation action for all vector types to expand. Then we
337 // will selectively turn on ones that can be effectively codegen'd.
338 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
339 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
340 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
341 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
342 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
343 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
345 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
346 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
347 // add/sub are legal for all supported vector VT's.
348 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
349 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
350 // mul has to be custom lowered.
351 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
353 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
354 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
355 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
356 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
357 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
358 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
360 // These operations need to be expanded:
361 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
362 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
363 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
364 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
365 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
367 // Custom lower build_vector, constant pool spills, insert and
368 // extract vector elements:
369 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
370 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
371 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
372 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
373 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
374 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
377 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
378 setOperationAction(ISD::AND, MVT::v16i8, Custom);
379 setOperationAction(ISD::OR, MVT::v16i8, Custom);
380 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
381 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
383 setShiftAmountType(MVT::i32);
384 setSetCCResultContents(ZeroOrOneSetCCResult);
386 setStackPointerRegisterToSaveRestore(SPU::R1);
388 // We have target-specific dag combine patterns for the following nodes:
389 setTargetDAGCombine(ISD::ADD);
390 setTargetDAGCombine(ISD::ZERO_EXTEND);
391 setTargetDAGCombine(ISD::SIGN_EXTEND);
392 setTargetDAGCombine(ISD::ANY_EXTEND);
394 computeRegisterProperties();
398 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
400 if (node_names.empty()) {
401 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
402 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
403 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
404 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
405 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
406 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
407 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
408 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
409 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
410 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
411 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
412 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
413 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
414 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
415 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
416 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
417 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
418 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
419 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
420 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
421 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
422 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
423 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
424 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
425 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
426 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
427 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
428 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
429 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
430 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
431 "SPUISD::ROTQUAD_RZ_BYTES";
432 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
433 "SPUISD::ROTQUAD_RZ_BITS";
434 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
435 "SPUISD::ROTBYTES_RIGHT_S";
436 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
437 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
438 "SPUISD::ROTBYTES_LEFT_CHAINED";
439 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
440 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
441 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
442 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
443 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
446 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
448 return ((i != node_names.end()) ? i->second : 0);
452 SPUTargetLowering::getSetCCResultType(const SDOperand &Op) const {
453 return Op.getValueType();
456 //===----------------------------------------------------------------------===//
457 // Calling convention code:
458 //===----------------------------------------------------------------------===//
460 #include "SPUGenCallingConv.inc"
462 //===----------------------------------------------------------------------===//
463 // LowerOperation implementation
464 //===----------------------------------------------------------------------===//
466 /// Aligned load common code for CellSPU
468 \param[in] Op The SelectionDAG load or store operand
469 \param[in] DAG The selection DAG
470 \param[in] ST CellSPU subtarget information structure
471 \param[in,out] alignment Caller initializes this to the load or store node's
472 value from getAlignment(), may be updated while generating the aligned load
473 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
474 offset (divisible by 16, modulo 16 == 0)
475 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
476 offset of the preferred slot (modulo 16 != 0)
477 \param[in,out] VT Caller initializes this value type to the the load or store
478 node's loaded or stored value type; may be updated if an i1-extended load or
480 \param[out] was16aligned true if the base pointer had 16-byte alignment,
481 otherwise false. Can help to determine if the chunk needs to be rotated.
483 Both load and store lowering load a block of data aligned on a 16-byte
484 boundary. This is the common aligned load code shared between both.
487 AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
489 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
490 MVT::ValueType &VT, bool &was16aligned)
492 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
493 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
494 SDOperand basePtr = LSN->getBasePtr();
495 SDOperand chain = LSN->getChain();
497 if (basePtr.getOpcode() == ISD::ADD) {
498 SDOperand Op1 = basePtr.Val->getOperand(1);
500 if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
501 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
503 alignOffs = (int) CN->getValue();
504 prefSlotOffs = (int) (alignOffs & 0xf);
506 // Adjust the rotation amount to ensure that the final result ends up in
507 // the preferred slot:
508 prefSlotOffs -= vtm->prefslot_byte;
509 basePtr = basePtr.getOperand(0);
511 // Loading from memory, can we adjust alignment?
512 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
513 SDOperand APtr = basePtr.getOperand(0);
514 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
515 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
516 alignment = GSDN->getGlobal()->getAlignment();
521 prefSlotOffs = -vtm->prefslot_byte;
525 prefSlotOffs = -vtm->prefslot_byte;
528 if (alignment == 16) {
529 // Realign the base pointer as a D-Form address:
530 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
531 basePtr = DAG.getNode(ISD::ADD, PtrVT,
533 DAG.getConstant((alignOffs & ~0xf), PtrVT));
536 // Emit the vector load:
538 return DAG.getLoad(MVT::v16i8, chain, basePtr,
539 LSN->getSrcValue(), LSN->getSrcValueOffset(),
540 LSN->isVolatile(), 16);
543 // Unaligned load or we're using the "large memory" model, which means that
544 // we have to be very pessimistic:
545 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
546 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
550 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
551 DAG.getConstant((alignOffs & ~0xf), PtrVT));
552 was16aligned = false;
553 return DAG.getLoad(MVT::v16i8, chain, basePtr,
554 LSN->getSrcValue(), LSN->getSrcValueOffset(),
555 LSN->isVolatile(), 16);
558 /// Custom lower loads for CellSPU
560 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
561 within a 16-byte block, we have to rotate to extract the requested element.
564 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
565 LoadSDNode *LN = cast<LoadSDNode>(Op);
566 SDOperand the_chain = LN->getChain();
567 MVT::ValueType VT = LN->getMemoryVT();
568 MVT::ValueType OpVT = Op.Val->getValueType(0);
569 ISD::LoadExtType ExtType = LN->getExtensionType();
570 unsigned alignment = LN->getAlignment();
573 switch (LN->getAddressingMode()) {
574 case ISD::UNINDEXED: {
578 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
583 the_chain = result.getValue(1);
584 // Rotate the chunk if necessary
587 if (rotamt != 0 || !was16aligned) {
588 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
593 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
595 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
596 LoadSDNode *LN1 = cast<LoadSDNode>(result);
597 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
598 DAG.getConstant(rotamt, PtrVT));
601 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
602 the_chain = result.getValue(1);
605 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
607 MVT::ValueType vecVT = MVT::v16i8;
609 // Convert the loaded v16i8 vector to the appropriate vector type
610 // specified by the operand:
613 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
615 vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
618 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
619 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
620 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
621 the_chain = result.getValue(1);
623 // Handle the sign and zero-extending loads for i1 and i8:
626 if (ExtType == ISD::SEXTLOAD) {
627 NewOpC = (OpVT == MVT::i1
628 ? SPUISD::EXTRACT_I1_SEXT
629 : SPUISD::EXTRACT_I8_SEXT);
631 assert(ExtType == ISD::ZEXTLOAD);
632 NewOpC = (OpVT == MVT::i1
633 ? SPUISD::EXTRACT_I1_ZEXT
634 : SPUISD::EXTRACT_I8_ZEXT);
637 result = DAG.getNode(NewOpC, OpVT, result);
640 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
641 SDOperand retops[2] = {
646 result = DAG.getNode(SPUISD::LDRESULT, retvts,
647 retops, sizeof(retops) / sizeof(retops[0]));
654 case ISD::LAST_INDEXED_MODE:
655 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
657 cerr << (unsigned) LN->getAddressingMode() << "\n";
665 /// Custom lower stores for CellSPU
667 All CellSPU stores are aligned to 16-byte boundaries, so for elements
668 within a 16-byte block, we have to generate a shuffle to insert the
669 requested element into its place, then store the resulting block.
672 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
673 StoreSDNode *SN = cast<StoreSDNode>(Op);
674 SDOperand Value = SN->getValue();
675 MVT::ValueType VT = Value.getValueType();
676 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
677 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
678 unsigned alignment = SN->getAlignment();
680 switch (SN->getAddressingMode()) {
681 case ISD::UNINDEXED: {
682 int chunk_offset, slot_offset;
685 // The vector type we really want to load from the 16-byte chunk, except
686 // in the case of MVT::i1, which has to be v16i8.
687 unsigned vecVT, stVecVT = MVT::v16i8;
690 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
691 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
693 SDOperand alignLoadVec =
694 AlignedLoad(Op, DAG, ST, SN, alignment,
695 chunk_offset, slot_offset, VT, was16aligned);
697 if (alignLoadVec.Val == 0)
700 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
701 SDOperand basePtr = LN->getBasePtr();
702 SDOperand the_chain = alignLoadVec.getValue(1);
703 SDOperand theValue = SN->getValue();
707 && (theValue.getOpcode() == ISD::AssertZext
708 || theValue.getOpcode() == ISD::AssertSext)) {
709 // Drill down and get the value for zero- and sign-extended
711 theValue = theValue.getOperand(0);
716 SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
717 SDOperand insertEltPtr;
718 SDOperand insertEltOp;
720 // If the base pointer is already a D-form address, then just create
721 // a new D-form address with a slot offset and the orignal base pointer.
722 // Otherwise generate a D-form address with the slot offset relative
723 // to the stack pointer, which is always aligned.
724 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
725 DEBUG(basePtr.Val->dump(&DAG));
728 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
729 (basePtr.getOpcode() == ISD::ADD
730 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
731 insertEltPtr = basePtr;
733 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
736 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
737 result = DAG.getNode(SPUISD::SHUFB, vecVT,
738 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
740 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
742 result = DAG.getStore(the_chain, result, basePtr,
743 LN->getSrcValue(), LN->getSrcValueOffset(),
744 LN->isVolatile(), LN->getAlignment());
753 case ISD::LAST_INDEXED_MODE:
754 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
756 cerr << (unsigned) SN->getAddressingMode() << "\n";
764 /// Generate the address of a constant pool entry.
766 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
767 MVT::ValueType PtrVT = Op.getValueType();
768 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
769 Constant *C = CP->getConstVal();
770 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
771 SDOperand Zero = DAG.getConstant(0, PtrVT);
772 const TargetMachine &TM = DAG.getTarget();
774 if (TM.getRelocationModel() == Reloc::Static) {
775 if (!ST->usingLargeMem()) {
776 // Just return the SDOperand with the constant pool address in it.
777 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
779 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
780 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
781 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
786 "LowerConstantPool: Relocation model other than static not supported.");
791 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
792 MVT::ValueType PtrVT = Op.getValueType();
793 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
794 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
795 SDOperand Zero = DAG.getConstant(0, PtrVT);
796 const TargetMachine &TM = DAG.getTarget();
798 if (TM.getRelocationModel() == Reloc::Static) {
799 if (!ST->usingLargeMem()) {
800 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
802 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
803 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
804 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
809 "LowerJumpTable: Relocation model other than static not supported.");
814 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
815 MVT::ValueType PtrVT = Op.getValueType();
816 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
817 GlobalValue *GV = GSDN->getGlobal();
818 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
819 const TargetMachine &TM = DAG.getTarget();
820 SDOperand Zero = DAG.getConstant(0, PtrVT);
822 if (TM.getRelocationModel() == Reloc::Static) {
823 if (!ST->usingLargeMem()) {
824 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
826 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
827 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
828 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
831 cerr << "LowerGlobalAddress: Relocation model other than static not "
840 //! Custom lower i64 integer constants
842 This code inserts all of the necessary juggling that needs to occur to load
843 a 64-bit constant into a register.
846 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
847 unsigned VT = Op.getValueType();
848 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
850 if (VT == MVT::i64) {
851 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
852 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
853 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
855 cerr << "LowerConstant: unhandled constant type "
856 << MVT::getValueTypeString(VT)
865 //! Custom lower double precision floating point constants
867 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
868 unsigned VT = Op.getValueType();
869 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
872 "LowerConstantFP: Node is not ConstantFPSDNode");
874 if (VT == MVT::f64) {
875 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
876 return DAG.getNode(ISD::BIT_CONVERT, VT,
877 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
883 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
885 LowerBRCOND(SDOperand Op, SelectionDAG &DAG)
887 SDOperand Cond = Op.getOperand(1);
888 MVT::ValueType CondVT = Cond.getValueType();
889 MVT::ValueType CondNVT;
891 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
892 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
893 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
895 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
898 return SDOperand(); // Unchanged
902 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
904 MachineFunction &MF = DAG.getMachineFunction();
905 MachineFrameInfo *MFI = MF.getFrameInfo();
906 MachineRegisterInfo &RegInfo = MF.getRegInfo();
907 SmallVector<SDOperand, 8> ArgValues;
908 SDOperand Root = Op.getOperand(0);
909 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
911 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
912 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
914 unsigned ArgOffset = SPUFrameInfo::minStackSize();
915 unsigned ArgRegIdx = 0;
916 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
918 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
920 // Add DAG nodes to load the arguments or copy them out of registers.
921 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
923 bool needsLoad = false;
924 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
925 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
929 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
930 << MVT::getValueTypeString(ObjectVT)
935 if (!isVarArg && ArgRegIdx < NumArgRegs) {
936 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
937 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
938 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
945 if (!isVarArg && ArgRegIdx < NumArgRegs) {
946 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
947 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
948 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
955 if (!isVarArg && ArgRegIdx < NumArgRegs) {
956 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
957 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
958 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
965 if (!isVarArg && ArgRegIdx < NumArgRegs) {
966 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
967 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
968 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
975 if (!isVarArg && ArgRegIdx < NumArgRegs) {
976 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
977 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
978 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
985 if (!isVarArg && ArgRegIdx < NumArgRegs) {
986 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
987 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
988 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
1000 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1001 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1002 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1003 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1011 // We need to load the argument to a virtual register if we determined above
1012 // that we ran out of physical registers of the appropriate type
1014 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1015 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1016 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1017 ArgOffset += StackSlotSize;
1020 ArgValues.push_back(ArgVal);
1023 // If the function takes variable number of arguments, make a frame index for
1024 // the start of the first vararg value... for expansion of llvm.va_start.
1026 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1028 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1029 // If this function is vararg, store any remaining integer argument regs to
1030 // their spots on the stack so that they may be loaded by deferencing the
1031 // result of va_next.
1032 SmallVector<SDOperand, 8> MemOps;
1033 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1034 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1035 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1036 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1037 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1038 MemOps.push_back(Store);
1039 // Increment the address by four for the next argument to store
1040 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1041 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1043 if (!MemOps.empty())
1044 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1047 ArgValues.push_back(Root);
1049 // Return the new list of results.
1050 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1051 Op.Val->value_end());
1052 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1055 /// isLSAAddress - Return the immediate to use if the specified
1056 /// value is representable as a LSA address.
1057 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1058 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1061 int Addr = C->getValue();
1062 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1063 (Addr << 14 >> 14) != Addr)
1064 return 0; // Top 14 bits have to be sext of immediate.
1066 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1071 LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1072 SDOperand Chain = Op.getOperand(0);
1074 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1075 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1077 SDOperand Callee = Op.getOperand(4);
1078 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1079 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1080 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1081 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1083 // Handy pointer type
1084 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1086 // Accumulate how many bytes are to be pushed on the stack, including the
1087 // linkage area, and parameter passing area. According to the SPU ABI,
1088 // we minimally need space for [LR] and [SP]
1089 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1091 // Set up a copy of the stack pointer for use loading and storing any
1092 // arguments that may not fit in the registers available for argument
1094 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1096 // Figure out which arguments are going to go in registers, and which in
1098 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1099 unsigned ArgRegIdx = 0;
1101 // Keep track of registers passing arguments
1102 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1103 // And the arguments passed on the stack
1104 SmallVector<SDOperand, 8> MemOpChains;
1106 for (unsigned i = 0; i != NumOps; ++i) {
1107 SDOperand Arg = Op.getOperand(5+2*i);
1109 // PtrOff will be used to store the current argument to the stack if a
1110 // register cannot be found for it.
1111 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1112 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1114 switch (Arg.getValueType()) {
1115 default: assert(0 && "Unexpected ValueType for argument!");
1119 if (ArgRegIdx != NumArgRegs) {
1120 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1122 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1123 ArgOffset += StackSlotSize;
1128 if (ArgRegIdx != NumArgRegs) {
1129 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1131 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1132 ArgOffset += StackSlotSize;
1139 if (ArgRegIdx != NumArgRegs) {
1140 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1142 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1143 ArgOffset += StackSlotSize;
1149 // Update number of stack bytes actually used, insert a call sequence start
1150 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1151 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1153 if (!MemOpChains.empty()) {
1154 // Adjust the stack pointer for the stack arguments.
1155 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1156 &MemOpChains[0], MemOpChains.size());
1159 // Build a sequence of copy-to-reg nodes chained together with token chain
1160 // and flag operands which copy the outgoing args into the appropriate regs.
1162 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1163 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1165 InFlag = Chain.getValue(1);
1168 std::vector<MVT::ValueType> NodeTys;
1169 NodeTys.push_back(MVT::Other); // Returns a chain
1170 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1172 SmallVector<SDOperand, 8> Ops;
1173 unsigned CallOpc = SPUISD::CALL;
1175 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1176 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1177 // node so that legalize doesn't hack it.
1178 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1179 GlobalValue *GV = G->getGlobal();
1180 unsigned CalleeVT = Callee.getValueType();
1181 SDOperand Zero = DAG.getConstant(0, PtrVT);
1182 SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1184 if (!ST->usingLargeMem()) {
1185 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1186 // style calls, otherwise, external symbols are BRASL calls. This assumes
1187 // that declared/defined symbols are in the same compilation unit and can
1188 // be reached through PC-relative jumps.
1191 // This may be an unsafe assumption for JIT and really large compilation
1193 if (GV->isDeclaration()) {
1194 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1196 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1199 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1201 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1203 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1204 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1205 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1206 // If this is an absolute destination address that appears to be a legal
1207 // local store address, use the munged value.
1208 Callee = SDOperand(Dest, 0);
1211 Ops.push_back(Chain);
1212 Ops.push_back(Callee);
1214 // Add argument registers to the end of the list so that they are known live
1216 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1217 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1218 RegsToPass[i].second.getValueType()));
1221 Ops.push_back(InFlag);
1222 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1223 InFlag = Chain.getValue(1);
1225 Chain = DAG.getCALLSEQ_END(Chain,
1226 DAG.getConstant(NumStackBytes, PtrVT),
1227 DAG.getConstant(0, PtrVT),
1229 if (Op.Val->getValueType(0) != MVT::Other)
1230 InFlag = Chain.getValue(1);
1232 SDOperand ResultVals[3];
1233 unsigned NumResults = 0;
1236 // If the call has results, copy the values out of the ret val registers.
1237 switch (Op.Val->getValueType(0)) {
1238 default: assert(0 && "Unexpected ret value!");
1239 case MVT::Other: break;
1241 if (Op.Val->getValueType(1) == MVT::i32) {
1242 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1243 ResultVals[0] = Chain.getValue(0);
1244 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1245 Chain.getValue(2)).getValue(1);
1246 ResultVals[1] = Chain.getValue(0);
1248 NodeTys.push_back(MVT::i32);
1250 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1251 ResultVals[0] = Chain.getValue(0);
1254 NodeTys.push_back(MVT::i32);
1257 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1258 ResultVals[0] = Chain.getValue(0);
1260 NodeTys.push_back(MVT::i64);
1264 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1265 InFlag).getValue(1);
1266 ResultVals[0] = Chain.getValue(0);
1268 NodeTys.push_back(Op.Val->getValueType(0));
1275 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1276 InFlag).getValue(1);
1277 ResultVals[0] = Chain.getValue(0);
1279 NodeTys.push_back(Op.Val->getValueType(0));
1283 NodeTys.push_back(MVT::Other);
1285 // If the function returns void, just return the chain.
1286 if (NumResults == 0)
1289 // Otherwise, merge everything together with a MERGE_VALUES node.
1290 ResultVals[NumResults++] = Chain;
1291 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1292 ResultVals, NumResults);
1293 return Res.getValue(Op.ResNo);
1297 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1298 SmallVector<CCValAssign, 16> RVLocs;
1299 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1300 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1301 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1302 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1304 // If this is the first return lowered for this function, add the regs to the
1305 // liveout set for the function.
1306 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1307 for (unsigned i = 0; i != RVLocs.size(); ++i)
1308 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1311 SDOperand Chain = Op.getOperand(0);
1314 // Copy the result values into the output registers.
1315 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1316 CCValAssign &VA = RVLocs[i];
1317 assert(VA.isRegLoc() && "Can only return in registers!");
1318 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1319 Flag = Chain.getValue(1);
1323 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1325 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1329 //===----------------------------------------------------------------------===//
1330 // Vector related lowering:
1331 //===----------------------------------------------------------------------===//
1333 static ConstantSDNode *
1334 getVecImm(SDNode *N) {
1335 SDOperand OpVal(0, 0);
1337 // Check to see if this buildvec has a single non-undef value in its elements.
1338 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1339 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1341 OpVal = N->getOperand(i);
1342 else if (OpVal != N->getOperand(i))
1346 if (OpVal.Val != 0) {
1347 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1352 return 0; // All UNDEF: use implicit def.; not Constant node
1355 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1356 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1358 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1359 MVT::ValueType ValueType) {
1360 if (ConstantSDNode *CN = getVecImm(N)) {
1361 uint64_t Value = CN->getValue();
1362 if (ValueType == MVT::i64) {
1363 uint64_t UValue = CN->getValue();
1364 uint32_t upper = uint32_t(UValue >> 32);
1365 uint32_t lower = uint32_t(UValue);
1368 Value = Value >> 32;
1370 if (Value <= 0x3ffff)
1371 return DAG.getConstant(Value, ValueType);
1377 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1378 /// and the value fits into a signed 16-bit constant, and if so, return the
1380 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1381 MVT::ValueType ValueType) {
1382 if (ConstantSDNode *CN = getVecImm(N)) {
1383 int64_t Value = CN->getSignExtended();
1384 if (ValueType == MVT::i64) {
1385 uint64_t UValue = CN->getValue();
1386 uint32_t upper = uint32_t(UValue >> 32);
1387 uint32_t lower = uint32_t(UValue);
1390 Value = Value >> 32;
1392 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1393 return DAG.getConstant(Value, ValueType);
1400 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1401 /// and the value fits into a signed 10-bit constant, and if so, return the
1403 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1404 MVT::ValueType ValueType) {
1405 if (ConstantSDNode *CN = getVecImm(N)) {
1406 int64_t Value = CN->getSignExtended();
1407 if (ValueType == MVT::i64) {
1408 uint64_t UValue = CN->getValue();
1409 uint32_t upper = uint32_t(UValue >> 32);
1410 uint32_t lower = uint32_t(UValue);
1413 Value = Value >> 32;
1415 if (isS10Constant(Value))
1416 return DAG.getConstant(Value, ValueType);
1422 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1423 /// and the value fits into a signed 8-bit constant, and if so, return the
1426 /// @note: The incoming vector is v16i8 because that's the only way we can load
1427 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1429 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1430 MVT::ValueType ValueType) {
1431 if (ConstantSDNode *CN = getVecImm(N)) {
1432 int Value = (int) CN->getValue();
1433 if (ValueType == MVT::i16
1434 && Value <= 0xffff /* truncated from uint64_t */
1435 && ((short) Value >> 8) == ((short) Value & 0xff))
1436 return DAG.getConstant(Value & 0xff, ValueType);
1437 else if (ValueType == MVT::i8
1438 && (Value & 0xff) == Value)
1439 return DAG.getConstant(Value, ValueType);
1445 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1446 /// and the value fits into a signed 16-bit constant, and if so, return the
1448 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1449 MVT::ValueType ValueType) {
1450 if (ConstantSDNode *CN = getVecImm(N)) {
1451 uint64_t Value = CN->getValue();
1452 if ((ValueType == MVT::i32
1453 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1454 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1455 return DAG.getConstant(Value >> 16, ValueType);
1461 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1462 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1463 if (ConstantSDNode *CN = getVecImm(N)) {
1464 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1470 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1471 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1472 if (ConstantSDNode *CN = getVecImm(N)) {
1473 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1479 // If this is a vector of constants or undefs, get the bits. A bit in
1480 // UndefBits is set if the corresponding element of the vector is an
1481 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1482 // zero. Return true if this is not an array of constants, false if it is.
1484 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1485 uint64_t UndefBits[2]) {
1486 // Start with zero'd results.
1487 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1489 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1490 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1491 SDOperand OpVal = BV->getOperand(i);
1493 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1494 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1496 uint64_t EltBits = 0;
1497 if (OpVal.getOpcode() == ISD::UNDEF) {
1498 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1499 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1501 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1502 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1503 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1504 const APFloat &apf = CN->getValueAPF();
1505 EltBits = (CN->getValueType(0) == MVT::f32
1506 ? FloatToBits(apf.convertToFloat())
1507 : DoubleToBits(apf.convertToDouble()));
1509 // Nonconstant element.
1513 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1516 //printf("%llx %llx %llx %llx\n",
1517 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1521 /// If this is a splat (repetition) of a value across the whole vector, return
1522 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1523 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1524 /// SplatSize = 1 byte.
1525 static bool isConstantSplat(const uint64_t Bits128[2],
1526 const uint64_t Undef128[2],
1528 uint64_t &SplatBits, uint64_t &SplatUndef,
1530 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1531 // the same as the lower 64-bits, ignoring undefs.
1532 uint64_t Bits64 = Bits128[0] | Bits128[1];
1533 uint64_t Undef64 = Undef128[0] & Undef128[1];
1534 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1535 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1536 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1537 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1539 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1540 if (MinSplatBits < 64) {
1542 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1544 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1545 if (MinSplatBits < 32) {
1547 // If the top 16-bits are different than the lower 16-bits, ignoring
1548 // undefs, we have an i32 splat.
1549 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1550 if (MinSplatBits < 16) {
1551 // If the top 8-bits are different than the lower 8-bits, ignoring
1552 // undefs, we have an i16 splat.
1553 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1554 // Otherwise, we have an 8-bit splat.
1555 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1556 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1562 SplatUndef = Undef16;
1569 SplatUndef = Undef32;
1575 SplatBits = Bits128[0];
1576 SplatUndef = Undef128[0];
1582 return false; // Can't be a splat if two pieces don't match.
1585 // If this is a case we can't handle, return null and let the default
1586 // expansion code take care of it. If we CAN select this case, and if it
1587 // selects to a single instruction, return Op. Otherwise, if we can codegen
1588 // this case more efficiently than a constant pool load, lower it to the
1589 // sequence of ops that should be used.
1590 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1591 MVT::ValueType VT = Op.getValueType();
1592 // If this is a vector of constants or undefs, get the bits. A bit in
1593 // UndefBits is set if the corresponding element of the vector is an
1594 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1596 uint64_t VectorBits[2];
1597 uint64_t UndefBits[2];
1598 uint64_t SplatBits, SplatUndef;
1600 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1601 || !isConstantSplat(VectorBits, UndefBits,
1602 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1603 SplatBits, SplatUndef, SplatSize))
1604 return SDOperand(); // Not a constant vector, not a splat.
1609 uint32_t Value32 = SplatBits;
1610 assert(SplatSize == 4
1611 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1612 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1613 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1614 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1615 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1619 uint64_t f64val = SplatBits;
1620 assert(SplatSize == 8
1621 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1622 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1623 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1624 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1625 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1629 // 8-bit constants have to be expanded to 16-bits
1630 unsigned short Value16 = SplatBits | (SplatBits << 8);
1632 for (int i = 0; i < 8; ++i)
1633 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1634 return DAG.getNode(ISD::BIT_CONVERT, VT,
1635 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1638 unsigned short Value16;
1640 Value16 = (unsigned short) (SplatBits & 0xffff);
1642 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1643 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1645 for (int i = 0; i < 8; ++i) Ops[i] = T;
1646 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1649 unsigned int Value = SplatBits;
1650 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1651 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1654 uint64_t val = SplatBits;
1655 uint32_t upper = uint32_t(val >> 32);
1656 uint32_t lower = uint32_t(val);
1658 if (upper == lower) {
1659 // Magic constant that can be matched by IL, ILA, et. al.
1660 SDOperand Val = DAG.getTargetConstant(val, MVT::i64);
1661 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1665 SmallVector<SDOperand, 16> ShufBytes;
1667 bool upper_special, lower_special;
1669 // NOTE: This code creates common-case shuffle masks that can be easily
1670 // detected as common expressions. It is not attempting to create highly
1671 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1673 // Detect if the upper or lower half is a special shuffle mask pattern:
1674 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1675 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1677 // Create lower vector if not a special pattern
1678 if (!lower_special) {
1679 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1680 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1681 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1682 LO32C, LO32C, LO32C, LO32C));
1685 // Create upper vector if not a special pattern
1686 if (!upper_special) {
1687 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1688 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1689 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1690 HI32C, HI32C, HI32C, HI32C));
1693 // If either upper or lower are special, then the two input operands are
1694 // the same (basically, one of them is a "don't care")
1699 if (lower_special && upper_special) {
1700 // Unhappy situation... both upper and lower are special, so punt with
1701 // a target constant:
1702 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1703 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1707 for (int i = 0; i < 4; ++i) {
1708 for (int j = 0; j < 4; ++j) {
1710 bool process_upper, process_lower;
1713 process_upper = (upper_special && (i & 1) == 0);
1714 process_lower = (lower_special && (i & 1) == 1);
1716 if (process_upper || process_lower) {
1717 if ((process_upper && upper == 0)
1718 || (process_lower && lower == 0))
1720 else if ((process_upper && upper == 0xffffffff)
1721 || (process_lower && lower == 0xffffffff))
1723 else if ((process_upper && upper == 0x80000000)
1724 || (process_lower && lower == 0x80000000))
1725 val = (j == 0 ? 0xe0 : 0x80);
1727 val = i * 4 + j + ((i & 1) * 16);
1729 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1733 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1734 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1735 &ShufBytes[0], ShufBytes.size()));
1743 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1744 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1745 /// permutation vector, V3, is monotonically increasing with one "exception"
1746 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1747 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1748 /// In either case, the net result is going to eventually invoke SHUFB to
1749 /// permute/shuffle the bytes from V1 and V2.
1751 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1752 /// control word for byte/halfword/word insertion. This takes care of a single
1753 /// element move from V2 into V1.
1755 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1756 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1757 SDOperand V1 = Op.getOperand(0);
1758 SDOperand V2 = Op.getOperand(1);
1759 SDOperand PermMask = Op.getOperand(2);
1761 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1763 // If we have a single element being moved from V1 to V2, this can be handled
1764 // using the C*[DX] compute mask instructions, but the vector elements have
1765 // to be monotonically increasing with one exception element.
1766 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1767 unsigned EltsFromV2 = 0;
1769 unsigned V2EltIdx0 = 0;
1770 unsigned CurrElt = 0;
1771 bool monotonic = true;
1772 if (EltVT == MVT::i8)
1774 else if (EltVT == MVT::i16)
1776 else if (EltVT == MVT::i32)
1779 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1781 for (unsigned i = 0, e = PermMask.getNumOperands();
1782 EltsFromV2 <= 1 && monotonic && i != e;
1785 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1788 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1790 if (SrcElt >= V2EltIdx0) {
1792 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1793 } else if (CurrElt != SrcElt) {
1800 if (EltsFromV2 == 1 && monotonic) {
1801 // Compute mask and shuffle
1802 MachineFunction &MF = DAG.getMachineFunction();
1803 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1804 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1805 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1806 // Initialize temporary register to 0
1807 SDOperand InitTempReg =
1808 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1809 // Copy register's contents as index in INSERT_MASK:
1810 SDOperand ShufMaskOp =
1811 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1812 DAG.getTargetConstant(V2Elt, MVT::i32),
1813 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1814 // Use shuffle mask in SHUFB synthetic instruction:
1815 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1817 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1818 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1820 SmallVector<SDOperand, 16> ResultMask;
1821 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1823 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1826 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1828 for (unsigned j = 0; j < BytesPerElement; ++j) {
1829 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1834 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1835 &ResultMask[0], ResultMask.size());
1836 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1840 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1841 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1843 if (Op0.Val->getOpcode() == ISD::Constant) {
1844 // For a constant, build the appropriate constant vector, which will
1845 // eventually simplify to a vector register load.
1847 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1848 SmallVector<SDOperand, 16> ConstVecValues;
1852 // Create a constant vector:
1853 switch (Op.getValueType()) {
1854 default: assert(0 && "Unexpected constant value type in "
1855 "LowerSCALAR_TO_VECTOR");
1856 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1857 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1858 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1859 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1860 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1861 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1864 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1865 for (size_t j = 0; j < n_copies; ++j)
1866 ConstVecValues.push_back(CValue);
1868 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1869 &ConstVecValues[0], ConstVecValues.size());
1871 // Otherwise, copy the value from one register to another:
1872 switch (Op0.getValueType()) {
1873 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1880 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1887 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1888 switch (Op.getValueType()) {
1890 SDOperand rA = Op.getOperand(0);
1891 SDOperand rB = Op.getOperand(1);
1892 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1893 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1894 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1895 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1897 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1901 // Multiply two v8i16 vectors (pipeline friendly version):
1902 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1903 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1904 // c) Use SELB to select upper and lower halves from the intermediate results
1906 // NOTE: We really want to move the FSMBI to earlier to actually get the
1907 // dual-issue. This code does manage to do this, even if it's a little on
1910 MachineFunction &MF = DAG.getMachineFunction();
1911 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1912 SDOperand Chain = Op.getOperand(0);
1913 SDOperand rA = Op.getOperand(0);
1914 SDOperand rB = Op.getOperand(1);
1915 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1916 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1919 DAG.getCopyToReg(Chain, FSMBIreg,
1920 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1921 DAG.getConstant(0xcccc, MVT::i32)));
1924 DAG.getCopyToReg(FSMBOp, HiProdReg,
1925 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1927 SDOperand HHProd_v4i32 =
1928 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1929 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1931 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1932 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1933 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1934 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1936 DAG.getConstant(16, MVT::i16))),
1937 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1940 // This M00sE is N@stI! (apologies to Monty Python)
1942 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1943 // is to break it all apart, sign extend, and reassemble the various
1944 // intermediate products.
1946 SDOperand rA = Op.getOperand(0);
1947 SDOperand rB = Op.getOperand(1);
1948 SDOperand c8 = DAG.getConstant(8, MVT::i32);
1949 SDOperand c16 = DAG.getConstant(16, MVT::i32);
1952 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1953 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1954 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1956 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1958 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1961 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1962 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1964 SDOperand FSMBmask = DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1965 DAG.getConstant(0x2222, MVT::i32));
1967 SDOperand LoProdParts =
1968 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1969 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1970 LLProd, LHProd, FSMBmask));
1972 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1975 DAG.getNode(ISD::AND, MVT::v4i32,
1977 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1978 LoProdMask, LoProdMask,
1979 LoProdMask, LoProdMask));
1982 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1983 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1986 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1987 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1990 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1991 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1992 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1994 SDOperand HHProd_1 =
1995 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1996 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1997 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1998 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1999 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
2002 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2004 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2008 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2010 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2011 DAG.getNode(ISD::OR, MVT::v4i32,
2016 cerr << "CellSPU: Unknown vector multiplication, got "
2017 << MVT::getValueTypeString(Op.getValueType())
2026 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2027 MachineFunction &MF = DAG.getMachineFunction();
2028 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2030 SDOperand A = Op.getOperand(0);
2031 SDOperand B = Op.getOperand(1);
2032 unsigned VT = Op.getValueType();
2034 unsigned VRegBR, VRegC;
2036 if (VT == MVT::f32) {
2037 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2038 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2040 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2041 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2043 // TODO: make sure we're feeding FPInterp the right arguments
2044 // Right now: fi B, frest(B)
2047 // (Floating Interpolate (FP Reciprocal Estimate B))
2049 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2050 DAG.getNode(SPUISD::FPInterp, VT, B,
2051 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2053 // Computes A * BRcpl and stores in a temporary register
2055 DAG.getCopyToReg(BRcpl, VRegC,
2056 DAG.getNode(ISD::FMUL, VT, A,
2057 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2058 // What's the Chain variable do? It's magic!
2059 // TODO: set Chain = Op(0).getEntryNode()
2061 return DAG.getNode(ISD::FADD, VT,
2062 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2063 DAG.getNode(ISD::FMUL, VT,
2064 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2065 DAG.getNode(ISD::FSUB, VT, A,
2066 DAG.getNode(ISD::FMUL, VT, B,
2067 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2070 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2071 unsigned VT = Op.getValueType();
2072 SDOperand N = Op.getOperand(0);
2073 SDOperand Elt = Op.getOperand(1);
2074 SDOperand ShufMask[16];
2075 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2077 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2079 int EltNo = (int) C->getValue();
2082 if (VT == MVT::i8 && EltNo >= 16)
2083 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2084 else if (VT == MVT::i16 && EltNo >= 8)
2085 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2086 else if (VT == MVT::i32 && EltNo >= 4)
2087 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2088 else if (VT == MVT::i64 && EltNo >= 2)
2089 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2091 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2092 // i32 and i64: Element 0 is the preferred slot
2093 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2096 // Need to generate shuffle mask and extract:
2097 int prefslot_begin = -1, prefslot_end = -1;
2098 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2102 prefslot_begin = prefslot_end = 3;
2106 prefslot_begin = 2; prefslot_end = 3;
2110 prefslot_begin = 0; prefslot_end = 3;
2114 prefslot_begin = 0; prefslot_end = 7;
2119 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2120 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2122 for (int i = 0; i < 16; ++i) {
2123 // zero fill uppper part of preferred slot, don't care about the
2125 unsigned int mask_val;
2127 if (i <= prefslot_end) {
2129 ((i < prefslot_begin)
2131 : elt_byte + (i - prefslot_begin));
2133 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2135 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2138 SDOperand ShufMaskVec =
2139 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2141 sizeof(ShufMask) / sizeof(ShufMask[0]));
2143 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2144 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2145 N, N, ShufMaskVec));
2149 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2150 SDOperand VecOp = Op.getOperand(0);
2151 SDOperand ValOp = Op.getOperand(1);
2152 SDOperand IdxOp = Op.getOperand(2);
2153 MVT::ValueType VT = Op.getValueType();
2155 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2156 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2158 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2159 // Use $2 because it's always 16-byte aligned and it's available:
2160 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2163 DAG.getNode(SPUISD::SHUFB, VT,
2164 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2166 DAG.getNode(SPUISD::INSERT_MASK, VT,
2167 DAG.getNode(ISD::ADD, PtrVT,
2169 DAG.getConstant(CN->getValue(),
2175 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2177 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2179 assert(Op.getValueType() == MVT::i8);
2182 assert(0 && "Unhandled i8 math operator");
2186 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2188 SDOperand N1 = Op.getOperand(1);
2189 N0 = (N0.getOpcode() != ISD::Constant
2190 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2191 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2192 N1 = (N1.getOpcode() != ISD::Constant
2193 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2194 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2195 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2196 DAG.getNode(Opc, MVT::i16, N0, N1));
2200 SDOperand N1 = Op.getOperand(1);
2202 N0 = (N0.getOpcode() != ISD::Constant
2203 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2204 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2205 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2206 N1 = (N1.getOpcode() != ISD::Constant
2207 ? DAG.getNode(N1Opc, MVT::i16, N1)
2208 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2209 SDOperand ExpandArg =
2210 DAG.getNode(ISD::OR, MVT::i16, N0,
2211 DAG.getNode(ISD::SHL, MVT::i16,
2212 N0, DAG.getConstant(8, MVT::i16)));
2213 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2214 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2218 SDOperand N1 = Op.getOperand(1);
2220 N0 = (N0.getOpcode() != ISD::Constant
2221 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2222 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2223 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2224 N1 = (N1.getOpcode() != ISD::Constant
2225 ? DAG.getNode(N1Opc, MVT::i16, N1)
2226 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2227 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2228 DAG.getNode(Opc, MVT::i16, N0, N1));
2231 SDOperand N1 = Op.getOperand(1);
2233 N0 = (N0.getOpcode() != ISD::Constant
2234 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2235 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2236 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2237 N1 = (N1.getOpcode() != ISD::Constant
2238 ? DAG.getNode(N1Opc, MVT::i16, N1)
2239 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2240 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2241 DAG.getNode(Opc, MVT::i16, N0, N1));
2244 SDOperand N1 = Op.getOperand(1);
2246 N0 = (N0.getOpcode() != ISD::Constant
2247 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2248 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2249 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2250 N1 = (N1.getOpcode() != ISD::Constant
2251 ? DAG.getNode(N1Opc, MVT::i16, N1)
2252 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2253 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2254 DAG.getNode(Opc, MVT::i16, N0, N1));
2262 static SDOperand LowerI64Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2264 MVT::ValueType VT = Op.getValueType();
2266 MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2268 SDOperand Op0 = Op.getOperand(0);
2271 case ISD::ZERO_EXTEND:
2272 case ISD::SIGN_EXTEND:
2273 case ISD::ANY_EXTEND: {
2274 MVT::ValueType Op0VT = Op0.getValueType();
2276 MVT::getVectorType(Op0VT, (128 / MVT::getSizeInBits(Op0VT)));
2278 assert(Op0VT == MVT::i32
2279 && "CellSPU: Zero/sign extending something other than i32");
2281 unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2282 ? SPUISD::ROTBYTES_RIGHT_S
2283 : SPUISD::ROTQUAD_RZ_BYTES);
2284 SDOperand PromoteScalar =
2285 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2287 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2288 DAG.getNode(ISD::BIT_CONVERT, VecVT,
2289 DAG.getNode(NewOpc, Op0VecVT,
2291 DAG.getConstant(4, MVT::i32))));
2295 SDOperand ShiftAmt = Op.getOperand(1);
2296 unsigned ShiftAmtVT = unsigned(ShiftAmt.getValueType());
2297 SDOperand Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2298 SDOperand MaskLower =
2299 DAG.getNode(SPUISD::SELB, VecVT,
2301 DAG.getConstant(0, VecVT),
2302 DAG.getNode(SPUISD::FSMBI, VecVT,
2303 DAG.getConstant(0xff00ULL, MVT::i16)));
2304 SDOperand ShiftAmtBytes =
2305 DAG.getNode(ISD::SRL, ShiftAmtVT,
2307 DAG.getConstant(3, ShiftAmtVT));
2308 SDOperand ShiftAmtBits =
2309 DAG.getNode(ISD::AND, ShiftAmtVT,
2311 DAG.getConstant(7, ShiftAmtVT));
2313 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2314 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2315 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2316 MaskLower, ShiftAmtBytes),
2321 unsigned VT = unsigned(Op.getValueType());
2322 SDOperand ShiftAmt = Op.getOperand(1);
2323 unsigned ShiftAmtVT = unsigned(ShiftAmt.getValueType());
2324 SDOperand ShiftAmtBytes =
2325 DAG.getNode(ISD::SRL, ShiftAmtVT,
2327 DAG.getConstant(3, ShiftAmtVT));
2328 SDOperand ShiftAmtBits =
2329 DAG.getNode(ISD::AND, ShiftAmtVT,
2331 DAG.getConstant(7, ShiftAmtVT));
2333 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2334 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2335 Op0, ShiftAmtBytes),
2343 //! Lower byte immediate operations for v16i8 vectors:
2345 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2348 MVT::ValueType VT = Op.getValueType();
2350 ConstVec = Op.getOperand(0);
2351 Arg = Op.getOperand(1);
2352 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2353 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2354 ConstVec = ConstVec.getOperand(0);
2356 ConstVec = Op.getOperand(1);
2357 Arg = Op.getOperand(0);
2358 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2359 ConstVec = ConstVec.getOperand(0);
2364 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2365 uint64_t VectorBits[2];
2366 uint64_t UndefBits[2];
2367 uint64_t SplatBits, SplatUndef;
2370 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2371 && isConstantSplat(VectorBits, UndefBits,
2372 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2373 SplatBits, SplatUndef, SplatSize)) {
2374 SDOperand tcVec[16];
2375 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2376 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2378 // Turn the BUILD_VECTOR into a set of target constants:
2379 for (size_t i = 0; i < tcVecSize; ++i)
2382 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2383 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2390 //! Lower i32 multiplication
2391 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2395 cerr << "CellSPU: Unknown LowerMUL value type, got "
2396 << MVT::getValueTypeString(Op.getValueType())
2402 SDOperand rA = Op.getOperand(0);
2403 SDOperand rB = Op.getOperand(1);
2405 return DAG.getNode(ISD::ADD, MVT::i32,
2406 DAG.getNode(ISD::ADD, MVT::i32,
2407 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2408 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2409 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2416 //! Custom lowering for CTPOP (count population)
2418 Custom lowering code that counts the number ones in the input
2419 operand. SPU has such an instruction, but it counts the number of
2420 ones per byte, which then have to be accumulated.
2422 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2423 unsigned VT = Op.getValueType();
2424 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2428 SDOperand N = Op.getOperand(0);
2429 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2431 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2432 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2434 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2438 MachineFunction &MF = DAG.getMachineFunction();
2439 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2441 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2443 SDOperand N = Op.getOperand(0);
2444 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2445 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2446 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2448 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2449 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2451 // CNTB_result becomes the chain to which all of the virtual registers
2452 // CNTB_reg, SUM1_reg become associated:
2453 SDOperand CNTB_result =
2454 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2456 SDOperand CNTB_rescopy =
2457 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2459 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2461 return DAG.getNode(ISD::AND, MVT::i16,
2462 DAG.getNode(ISD::ADD, MVT::i16,
2463 DAG.getNode(ISD::SRL, MVT::i16,
2470 MachineFunction &MF = DAG.getMachineFunction();
2471 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2473 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2474 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2476 SDOperand N = Op.getOperand(0);
2477 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2478 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2479 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2480 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2482 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2483 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2485 // CNTB_result becomes the chain to which all of the virtual registers
2486 // CNTB_reg, SUM1_reg become associated:
2487 SDOperand CNTB_result =
2488 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2490 SDOperand CNTB_rescopy =
2491 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2494 DAG.getNode(ISD::SRL, MVT::i32,
2495 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2498 DAG.getNode(ISD::ADD, MVT::i32,
2499 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2501 SDOperand Sum1_rescopy =
2502 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2505 DAG.getNode(ISD::SRL, MVT::i32,
2506 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2509 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2510 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2512 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2522 /// LowerOperation - Provide custom lowering hooks for some operations.
2525 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2527 unsigned Opc = (unsigned) Op.getOpcode();
2528 unsigned VT = (unsigned) Op.getValueType();
2532 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2533 cerr << "Op.getOpcode() = " << Opc << "\n";
2534 cerr << "*Op.Val:\n";
2541 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2543 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2544 case ISD::ConstantPool:
2545 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2546 case ISD::GlobalAddress:
2547 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2548 case ISD::JumpTable:
2549 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2551 return LowerConstant(Op, DAG);
2552 case ISD::ConstantFP:
2553 return LowerConstantFP(Op, DAG);
2555 return LowerBRCOND(Op, DAG);
2556 case ISD::FORMAL_ARGUMENTS:
2557 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2559 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2561 return LowerRET(Op, DAG, getTargetMachine());
2564 // i8, i64 math ops:
2565 case ISD::ZERO_EXTEND:
2566 case ISD::SIGN_EXTEND:
2567 case ISD::ANY_EXTEND:
2575 return LowerI8Math(Op, DAG, Opc);
2576 else if (VT == MVT::i64)
2577 return LowerI64Math(Op, DAG, Opc);
2580 // Vector-related lowering.
2581 case ISD::BUILD_VECTOR:
2582 return LowerBUILD_VECTOR(Op, DAG);
2583 case ISD::SCALAR_TO_VECTOR:
2584 return LowerSCALAR_TO_VECTOR(Op, DAG);
2585 case ISD::VECTOR_SHUFFLE:
2586 return LowerVECTOR_SHUFFLE(Op, DAG);
2587 case ISD::EXTRACT_VECTOR_ELT:
2588 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2589 case ISD::INSERT_VECTOR_ELT:
2590 return LowerINSERT_VECTOR_ELT(Op, DAG);
2592 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2596 return LowerByteImmed(Op, DAG);
2598 // Vector and i8 multiply:
2600 if (MVT::isVector(VT))
2601 return LowerVectorMUL(Op, DAG);
2602 else if (VT == MVT::i8)
2603 return LowerI8Math(Op, DAG, Opc);
2605 return LowerMUL(Op, DAG, VT, Opc);
2608 if (VT == MVT::f32 || VT == MVT::v4f32)
2609 return LowerFDIVf32(Op, DAG);
2610 // else if (Op.getValueType() == MVT::f64)
2611 // return LowerFDIVf64(Op, DAG);
2613 assert(0 && "Calling FDIV on unsupported MVT");
2616 return LowerCTPOP(Op, DAG);
2622 //===----------------------------------------------------------------------===//
2623 // Target Optimization Hooks
2624 //===----------------------------------------------------------------------===//
2627 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2630 TargetMachine &TM = getTargetMachine();
2632 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2633 SelectionDAG &DAG = DCI.DAG;
2634 SDOperand Op0 = N->getOperand(0); // everything has at least one operand
2635 SDOperand Result; // Initially, NULL result
2637 switch (N->getOpcode()) {
2640 SDOperand Op1 = N->getOperand(1);
2642 if ((Op1.getOpcode() == ISD::Constant
2643 || Op1.getOpcode() == ISD::TargetConstant)
2644 && Op0.getOpcode() == SPUISD::IndirectAddr) {
2645 SDOperand Op01 = Op0.getOperand(1);
2646 if (Op01.getOpcode() == ISD::Constant
2647 || Op01.getOpcode() == ISD::TargetConstant) {
2648 // (add <const>, (SPUindirect <arg>, <const>)) ->
2649 // (SPUindirect <arg>, <const + const>)
2650 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2651 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2652 SDOperand combinedConst =
2653 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2654 Op0.getValueType());
2656 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2657 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2658 DEBUG(cerr << "With: (SPUindirect <arg>, "
2659 << CN0->getValue() + CN1->getValue() << ")\n");
2660 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2661 Op0.getOperand(0), combinedConst);
2663 } else if ((Op0.getOpcode() == ISD::Constant
2664 || Op0.getOpcode() == ISD::TargetConstant)
2665 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2666 SDOperand Op11 = Op1.getOperand(1);
2667 if (Op11.getOpcode() == ISD::Constant
2668 || Op11.getOpcode() == ISD::TargetConstant) {
2669 // (add (SPUindirect <arg>, <const>), <const>) ->
2670 // (SPUindirect <arg>, <const + const>)
2671 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2672 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2673 SDOperand combinedConst =
2674 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2675 Op0.getValueType());
2677 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2678 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2679 DEBUG(cerr << "With: (SPUindirect <arg>, "
2680 << CN0->getValue() + CN1->getValue() << ")\n");
2682 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2683 Op1.getOperand(0), combinedConst);
2688 case ISD::SIGN_EXTEND:
2689 case ISD::ZERO_EXTEND:
2690 case ISD::ANY_EXTEND: {
2691 if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2692 N->getValueType(0) == Op0.getValueType()) {
2693 // (any_extend (SPUextract_elt0 <arg>)) ->
2694 // (SPUextract_elt0 <arg>)
2695 // Types must match, however...
2696 DEBUG(cerr << "Replace: ");
2697 DEBUG(N->dump(&DAG));
2698 DEBUG(cerr << "\nWith: ");
2699 DEBUG(Op0.Val->dump(&DAG));
2700 DEBUG(cerr << "\n");
2706 case SPUISD::IndirectAddr: {
2707 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2708 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2709 if (CN->getValue() == 0) {
2710 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2711 // (SPUaform <addr>, 0)
2713 DEBUG(cerr << "Replace: ");
2714 DEBUG(N->dump(&DAG));
2715 DEBUG(cerr << "\nWith: ");
2716 DEBUG(Op0.Val->dump(&DAG));
2717 DEBUG(cerr << "\n");
2724 case SPUISD::SHLQUAD_L_BITS:
2725 case SPUISD::SHLQUAD_L_BYTES:
2726 case SPUISD::VEC_SHL:
2727 case SPUISD::VEC_SRL:
2728 case SPUISD::VEC_SRA:
2729 case SPUISD::ROTQUAD_RZ_BYTES:
2730 case SPUISD::ROTQUAD_RZ_BITS: {
2731 SDOperand Op1 = N->getOperand(1);
2733 if (isa<ConstantSDNode>(Op1)) {
2734 // Kill degenerate vector shifts:
2735 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2737 if (CN->getValue() == 0) {
2743 case SPUISD::PROMOTE_SCALAR: {
2744 switch (Op0.getOpcode()) {
2747 case ISD::ANY_EXTEND:
2748 case ISD::ZERO_EXTEND:
2749 case ISD::SIGN_EXTEND: {
2750 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2752 // but only if the SPUpromote_scalar and <arg> types match.
2753 SDOperand Op00 = Op0.getOperand(0);
2754 if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2755 SDOperand Op000 = Op00.getOperand(0);
2756 if (Op000.getValueType() == N->getValueType(0)) {
2762 case SPUISD::EXTRACT_ELT0: {
2763 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2765 Result = Op0.getOperand(0);
2772 // Otherwise, return unchanged.
2775 DEBUG(cerr << "\nReplace.SPU: ");
2776 DEBUG(N->dump(&DAG));
2777 DEBUG(cerr << "\nWith: ");
2778 DEBUG(Result.Val->dump(&DAG));
2779 DEBUG(cerr << "\n");
2786 //===----------------------------------------------------------------------===//
2787 // Inline Assembly Support
2788 //===----------------------------------------------------------------------===//
2790 /// getConstraintType - Given a constraint letter, return the type of
2791 /// constraint it is for this target.
2792 SPUTargetLowering::ConstraintType
2793 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2794 if (ConstraintLetter.size() == 1) {
2795 switch (ConstraintLetter[0]) {
2802 return C_RegisterClass;
2805 return TargetLowering::getConstraintType(ConstraintLetter);
2808 std::pair<unsigned, const TargetRegisterClass*>
2809 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2810 MVT::ValueType VT) const
2812 if (Constraint.size() == 1) {
2813 // GCC RS6000 Constraint Letters
2814 switch (Constraint[0]) {
2818 return std::make_pair(0U, SPU::R64CRegisterClass);
2819 return std::make_pair(0U, SPU::R32CRegisterClass);
2822 return std::make_pair(0U, SPU::R32FPRegisterClass);
2823 else if (VT == MVT::f64)
2824 return std::make_pair(0U, SPU::R64FPRegisterClass);
2827 return std::make_pair(0U, SPU::GPRCRegisterClass);
2831 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2834 //! Compute used/known bits for a SPU operand
2836 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2840 const SelectionDAG &DAG,
2841 unsigned Depth ) const {
2842 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2844 switch (Op.getOpcode()) {
2846 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2856 case SPUISD::PROMOTE_SCALAR: {
2857 SDOperand Op0 = Op.getOperand(0);
2858 uint64_t InMask = MVT::getIntVTBitMask(Op0.getValueType());
2859 KnownZero |= APInt(uint64_sizebits, ~InMask, false);
2860 KnownOne |= APInt(uint64_sizebits, InMask, false);
2864 case SPUISD::LDRESULT:
2865 case SPUISD::EXTRACT_ELT0:
2866 case SPUISD::EXTRACT_ELT0_CHAINED: {
2867 uint64_t InMask = MVT::getIntVTBitMask(Op.getValueType());
2868 KnownZero |= APInt(uint64_sizebits, ~InMask, false);
2869 KnownOne |= APInt(uint64_sizebits, InMask, false);
2874 case EXTRACT_I1_ZEXT:
2875 case EXTRACT_I1_SEXT:
2876 case EXTRACT_I8_ZEXT:
2877 case EXTRACT_I8_SEXT:
2882 case SHLQUAD_L_BITS:
2883 case SHLQUAD_L_BYTES:
2889 case ROTQUAD_RZ_BYTES:
2890 case ROTQUAD_RZ_BITS:
2891 case ROTBYTES_RIGHT_S:
2893 case ROTBYTES_LEFT_CHAINED:
2904 // LowerAsmOperandForConstraint
2906 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2907 char ConstraintLetter,
2908 std::vector<SDOperand> &Ops,
2909 SelectionDAG &DAG) {
2910 // Default, for the time being, to the base class handler
2911 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2914 /// isLegalAddressImmediate - Return true if the integer value can be used
2915 /// as the offset of the target addressing mode.
2916 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2917 // SPU's addresses are 256K:
2918 return (V > -(1 << 18) && V < (1 << 18) - 1);
2921 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {