1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "llvm/ADT/VectorExtras.h"
18 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT::ValueType mapping to useful data for Cell SPU
41 struct valtype_map_s {
42 const MVT::ValueType valtype;
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
72 << MVT::getValueTypeString(VT)
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDOperand &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDOperand &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 // NOTE: i8 register class is not registered because we cannot determine when
126 // we need to zero or sign extend for custom-lowered loads and stores.
127 // NOTE: Ignore the previous note. For now. :-)
128 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
129 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
130 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
131 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
132 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
133 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
134 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
136 // SPU has no sign or zero extended loads for i1, i8, i16:
137 setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote);
138 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
139 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
140 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
141 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
142 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
143 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
144 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
146 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
147 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
148 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
149 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
150 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
151 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
152 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
153 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
155 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
156 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
157 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
159 // SPU constant load actions are custom lowered:
160 setOperationAction(ISD::Constant, MVT::i64, Custom);
161 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
162 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
164 // SPU's loads and stores have to be custom lowered:
165 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
167 setOperationAction(ISD::LOAD, sctype, Custom);
168 setOperationAction(ISD::STORE, sctype, Custom);
171 // Custom lower BRCOND for i1, i8 to "promote" the result to
172 // i32 and i16, respectively.
173 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
175 // Expand the jumptable branches
176 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
177 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
178 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
180 // SPU has no intrinsics for these particular operations:
181 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
182 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
183 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
184 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
186 // PowerPC has no SREM/UREM instructions
187 setOperationAction(ISD::SREM, MVT::i32, Expand);
188 setOperationAction(ISD::UREM, MVT::i32, Expand);
189 setOperationAction(ISD::SREM, MVT::i64, Expand);
190 setOperationAction(ISD::UREM, MVT::i64, Expand);
192 // We don't support sin/cos/sqrt/fmod
193 setOperationAction(ISD::FSIN , MVT::f64, Expand);
194 setOperationAction(ISD::FCOS , MVT::f64, Expand);
195 setOperationAction(ISD::FREM , MVT::f64, Expand);
196 setOperationAction(ISD::FSIN , MVT::f32, Expand);
197 setOperationAction(ISD::FCOS , MVT::f32, Expand);
198 setOperationAction(ISD::FREM , MVT::f32, Expand);
200 // If we're enabling GP optimizations, use hardware square root
201 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
202 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
204 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
205 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
207 // SPU can do rotate right and left, so legalize it... but customize for i8
208 // because instructions don't exist.
209 setOperationAction(ISD::ROTR, MVT::i32, Legal);
210 setOperationAction(ISD::ROTR, MVT::i16, Legal);
211 setOperationAction(ISD::ROTR, MVT::i8, Custom);
212 setOperationAction(ISD::ROTL, MVT::i32, Legal);
213 setOperationAction(ISD::ROTL, MVT::i16, Legal);
214 setOperationAction(ISD::ROTL, MVT::i8, Custom);
215 // SPU has no native version of shift left/right for i8
216 setOperationAction(ISD::SHL, MVT::i8, Custom);
217 setOperationAction(ISD::SRL, MVT::i8, Custom);
218 setOperationAction(ISD::SRA, MVT::i8, Custom);
219 // And SPU needs custom lowering for shift left/right for i64
220 setOperationAction(ISD::SHL, MVT::i64, Custom);
221 setOperationAction(ISD::SRL, MVT::i64, Custom);
222 setOperationAction(ISD::SRA, MVT::i64, Custom);
224 // Custom lower i32 multiplications
225 setOperationAction(ISD::MUL, MVT::i32, Custom);
227 // Need to custom handle (some) common i8 math ops
228 setOperationAction(ISD::SUB, MVT::i8, Custom);
229 setOperationAction(ISD::MUL, MVT::i8, Custom);
231 // SPU does not have BSWAP. It does have i32 support CTLZ.
232 // CTPOP has to be custom lowered.
233 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
234 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
236 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
237 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
238 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
239 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
241 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
242 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
244 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
246 // SPU has a version of select
247 setOperationAction(ISD::SELECT, MVT::i1, Expand);
248 setOperationAction(ISD::SELECT, MVT::i8, Expand);
249 setOperationAction(ISD::SELECT, MVT::i16, Legal);
250 setOperationAction(ISD::SELECT, MVT::i32, Legal);
251 setOperationAction(ISD::SELECT, MVT::i64, Expand);
252 setOperationAction(ISD::SELECT, MVT::f32, Expand);
253 setOperationAction(ISD::SELECT, MVT::f64, Expand);
255 setOperationAction(ISD::SETCC, MVT::i1, Expand);
256 setOperationAction(ISD::SETCC, MVT::i8, Expand);
257 setOperationAction(ISD::SETCC, MVT::i16, Legal);
258 setOperationAction(ISD::SETCC, MVT::i32, Legal);
259 setOperationAction(ISD::SETCC, MVT::i64, Expand);
260 setOperationAction(ISD::SETCC, MVT::f32, Expand);
261 setOperationAction(ISD::SETCC, MVT::f64, Expand);
263 // Zero extension and sign extension for i64 have to be
265 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
266 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
267 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
269 // SPU has a legal FP -> signed INT instruction
270 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
271 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
272 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
273 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
275 // FDIV on SPU requires custom lowering
276 setOperationAction(ISD::FDIV, MVT::f32, Custom);
277 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
279 // SPU has [U|S]INT_TO_FP
280 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
281 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
282 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
283 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
284 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
285 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
286 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
287 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
289 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
290 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
291 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
292 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
294 // We cannot sextinreg(i1). Expand to shifts.
295 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
297 // Support label based line numbers.
298 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
299 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
301 // We want to legalize GlobalAddress and ConstantPool nodes into the
302 // appropriate instructions to materialize the address.
303 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
305 setOperationAction(ISD::GlobalAddress, sctype, Custom);
306 setOperationAction(ISD::ConstantPool, sctype, Custom);
307 setOperationAction(ISD::JumpTable, sctype, Custom);
310 // RET must be custom lowered, to meet ABI requirements
311 setOperationAction(ISD::RET, MVT::Other, Custom);
313 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
314 setOperationAction(ISD::VASTART , MVT::Other, Custom);
316 // Use the default implementation.
317 setOperationAction(ISD::VAARG , MVT::Other, Expand);
318 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
319 setOperationAction(ISD::VAEND , MVT::Other, Expand);
320 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
321 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
322 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
323 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
324 setOperationAction(ISD::PREFETCH , MVT::Other, Expand);
326 // Cell SPU has instructions for converting between i64 and fp.
327 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
328 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
330 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
331 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
333 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
334 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
336 // First set operation action for all vector types to expand. Then we
337 // will selectively turn on ones that can be effectively codegen'd.
338 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
339 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
340 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
341 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
342 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
343 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
345 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
346 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
347 // add/sub are legal for all supported vector VT's.
348 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
349 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
350 // mul has to be custom lowered.
351 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
353 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
354 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
355 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
356 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
357 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
358 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
360 // These operations need to be expanded:
361 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
362 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
363 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
364 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
365 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
367 // Custom lower build_vector, constant pool spills, insert and
368 // extract vector elements:
369 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
370 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
371 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
372 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
373 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
374 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
377 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
378 setOperationAction(ISD::AND, MVT::v16i8, Custom);
379 setOperationAction(ISD::OR, MVT::v16i8, Custom);
380 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
381 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
383 setSetCCResultType(MVT::i32);
384 setShiftAmountType(MVT::i32);
385 setSetCCResultContents(ZeroOrOneSetCCResult);
387 setStackPointerRegisterToSaveRestore(SPU::R1);
389 // We have target-specific dag combine patterns for the following nodes:
390 setTargetDAGCombine(ISD::ADD);
391 setTargetDAGCombine(ISD::ZERO_EXTEND);
392 setTargetDAGCombine(ISD::SIGN_EXTEND);
393 setTargetDAGCombine(ISD::ANY_EXTEND);
395 computeRegisterProperties();
399 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
401 if (node_names.empty()) {
402 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
403 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
404 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
405 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
406 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
407 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
408 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
409 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
410 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
411 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
412 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
413 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
414 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
415 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
416 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
417 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
418 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
419 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
420 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
421 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
422 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
423 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
424 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
425 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
426 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
427 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
428 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
429 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
430 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
431 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
432 "SPUISD::ROTQUAD_RZ_BYTES";
433 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
434 "SPUISD::ROTQUAD_RZ_BITS";
435 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
436 "SPUISD::ROTBYTES_RIGHT_S";
437 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
438 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
439 "SPUISD::ROTBYTES_LEFT_CHAINED";
440 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
441 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
442 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
443 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
444 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
447 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
449 return ((i != node_names.end()) ? i->second : 0);
452 //===----------------------------------------------------------------------===//
453 // Calling convention code:
454 //===----------------------------------------------------------------------===//
456 #include "SPUGenCallingConv.inc"
458 //===----------------------------------------------------------------------===//
459 // LowerOperation implementation
460 //===----------------------------------------------------------------------===//
462 /// Aligned load common code for CellSPU
464 \param[in] Op The SelectionDAG load or store operand
465 \param[in] DAG The selection DAG
466 \param[in] ST CellSPU subtarget information structure
467 \param[in,out] alignment Caller initializes this to the load or store node's
468 value from getAlignment(), may be updated while generating the aligned load
469 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
470 offset (divisible by 16, modulo 16 == 0)
471 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
472 offset of the preferred slot (modulo 16 != 0)
473 \param[in,out] VT Caller initializes this value type to the the load or store
474 node's loaded or stored value type; may be updated if an i1-extended load or
476 \param[out] was16aligned true if the base pointer had 16-byte alignment,
477 otherwise false. Can help to determine if the chunk needs to be rotated.
479 Both load and store lowering load a block of data aligned on a 16-byte
480 boundary. This is the common aligned load code shared between both.
483 AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
485 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
486 MVT::ValueType &VT, bool &was16aligned)
488 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
489 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
490 SDOperand basePtr = LSN->getBasePtr();
491 SDOperand chain = LSN->getChain();
493 if (basePtr.getOpcode() == ISD::ADD) {
494 SDOperand Op1 = basePtr.Val->getOperand(1);
496 if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
497 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
499 alignOffs = (int) CN->getValue();
500 prefSlotOffs = (int) (alignOffs & 0xf);
502 // Adjust the rotation amount to ensure that the final result ends up in
503 // the preferred slot:
504 prefSlotOffs -= vtm->prefslot_byte;
505 basePtr = basePtr.getOperand(0);
507 // Loading from memory, can we adjust alignment?
508 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
509 SDOperand APtr = basePtr.getOperand(0);
510 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
511 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
512 alignment = GSDN->getGlobal()->getAlignment();
517 prefSlotOffs = -vtm->prefslot_byte;
521 prefSlotOffs = -vtm->prefslot_byte;
524 if (alignment == 16) {
525 // Realign the base pointer as a D-Form address:
526 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
527 basePtr = DAG.getNode(ISD::ADD, PtrVT,
529 DAG.getConstant((alignOffs & ~0xf), PtrVT));
532 // Emit the vector load:
534 return DAG.getLoad(MVT::v16i8, chain, basePtr,
535 LSN->getSrcValue(), LSN->getSrcValueOffset(),
536 LSN->isVolatile(), 16);
539 // Unaligned load or we're using the "large memory" model, which means that
540 // we have to be very pessimistic:
541 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
542 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
546 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
547 DAG.getConstant((alignOffs & ~0xf), PtrVT));
548 was16aligned = false;
549 return DAG.getLoad(MVT::v16i8, chain, basePtr,
550 LSN->getSrcValue(), LSN->getSrcValueOffset(),
551 LSN->isVolatile(), 16);
554 /// Custom lower loads for CellSPU
556 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
557 within a 16-byte block, we have to rotate to extract the requested element.
560 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
561 LoadSDNode *LN = cast<LoadSDNode>(Op);
562 SDOperand the_chain = LN->getChain();
563 MVT::ValueType VT = LN->getMemoryVT();
564 MVT::ValueType OpVT = Op.Val->getValueType(0);
565 ISD::LoadExtType ExtType = LN->getExtensionType();
566 unsigned alignment = LN->getAlignment();
569 switch (LN->getAddressingMode()) {
570 case ISD::UNINDEXED: {
574 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
579 the_chain = result.getValue(1);
580 // Rotate the chunk if necessary
583 if (rotamt != 0 || !was16aligned) {
584 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
589 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
591 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
592 LoadSDNode *LN1 = cast<LoadSDNode>(result);
593 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
594 DAG.getConstant(rotamt, PtrVT));
597 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
598 the_chain = result.getValue(1);
601 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
603 MVT::ValueType vecVT = MVT::v16i8;
605 // Convert the loaded v16i8 vector to the appropriate vector type
606 // specified by the operand:
609 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
611 vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
614 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
615 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
616 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
617 the_chain = result.getValue(1);
619 // Handle the sign and zero-extending loads for i1 and i8:
622 if (ExtType == ISD::SEXTLOAD) {
623 NewOpC = (OpVT == MVT::i1
624 ? SPUISD::EXTRACT_I1_SEXT
625 : SPUISD::EXTRACT_I8_SEXT);
627 assert(ExtType == ISD::ZEXTLOAD);
628 NewOpC = (OpVT == MVT::i1
629 ? SPUISD::EXTRACT_I1_ZEXT
630 : SPUISD::EXTRACT_I8_ZEXT);
633 result = DAG.getNode(NewOpC, OpVT, result);
636 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
637 SDOperand retops[2] = {
642 result = DAG.getNode(SPUISD::LDRESULT, retvts,
643 retops, sizeof(retops) / sizeof(retops[0]));
650 case ISD::LAST_INDEXED_MODE:
651 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
653 cerr << (unsigned) LN->getAddressingMode() << "\n";
661 /// Custom lower stores for CellSPU
663 All CellSPU stores are aligned to 16-byte boundaries, so for elements
664 within a 16-byte block, we have to generate a shuffle to insert the
665 requested element into its place, then store the resulting block.
668 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
669 StoreSDNode *SN = cast<StoreSDNode>(Op);
670 SDOperand Value = SN->getValue();
671 MVT::ValueType VT = Value.getValueType();
672 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
673 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
674 unsigned alignment = SN->getAlignment();
676 switch (SN->getAddressingMode()) {
677 case ISD::UNINDEXED: {
678 int chunk_offset, slot_offset;
681 // The vector type we really want to load from the 16-byte chunk, except
682 // in the case of MVT::i1, which has to be v16i8.
683 unsigned vecVT, stVecVT = MVT::v16i8;
686 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
687 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
689 SDOperand alignLoadVec =
690 AlignedLoad(Op, DAG, ST, SN, alignment,
691 chunk_offset, slot_offset, VT, was16aligned);
693 if (alignLoadVec.Val == 0)
696 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
697 SDOperand basePtr = LN->getBasePtr();
698 SDOperand the_chain = alignLoadVec.getValue(1);
699 SDOperand theValue = SN->getValue();
703 && (theValue.getOpcode() == ISD::AssertZext
704 || theValue.getOpcode() == ISD::AssertSext)) {
705 // Drill down and get the value for zero- and sign-extended
707 theValue = theValue.getOperand(0);
712 SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
713 SDOperand insertEltPtr;
714 SDOperand insertEltOp;
716 // If the base pointer is already a D-form address, then just create
717 // a new D-form address with a slot offset and the orignal base pointer.
718 // Otherwise generate a D-form address with the slot offset relative
719 // to the stack pointer, which is always aligned.
720 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
721 DEBUG(basePtr.Val->dump(&DAG));
724 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
725 (basePtr.getOpcode() == ISD::ADD
726 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
727 insertEltPtr = basePtr;
729 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
732 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
733 result = DAG.getNode(SPUISD::SHUFB, vecVT,
734 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
736 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
738 result = DAG.getStore(the_chain, result, basePtr,
739 LN->getSrcValue(), LN->getSrcValueOffset(),
740 LN->isVolatile(), LN->getAlignment());
749 case ISD::LAST_INDEXED_MODE:
750 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
752 cerr << (unsigned) SN->getAddressingMode() << "\n";
760 /// Generate the address of a constant pool entry.
762 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
763 MVT::ValueType PtrVT = Op.getValueType();
764 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
765 Constant *C = CP->getConstVal();
766 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
767 SDOperand Zero = DAG.getConstant(0, PtrVT);
768 const TargetMachine &TM = DAG.getTarget();
770 if (TM.getRelocationModel() == Reloc::Static) {
771 if (!ST->usingLargeMem()) {
772 // Just return the SDOperand with the constant pool address in it.
773 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
775 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
776 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
777 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
782 "LowerConstantPool: Relocation model other than static not supported.");
787 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
788 MVT::ValueType PtrVT = Op.getValueType();
789 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
790 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
791 SDOperand Zero = DAG.getConstant(0, PtrVT);
792 const TargetMachine &TM = DAG.getTarget();
794 if (TM.getRelocationModel() == Reloc::Static) {
795 if (!ST->usingLargeMem()) {
796 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
798 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
799 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
800 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
805 "LowerJumpTable: Relocation model other than static not supported.");
810 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
811 MVT::ValueType PtrVT = Op.getValueType();
812 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
813 GlobalValue *GV = GSDN->getGlobal();
814 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
815 const TargetMachine &TM = DAG.getTarget();
816 SDOperand Zero = DAG.getConstant(0, PtrVT);
818 if (TM.getRelocationModel() == Reloc::Static) {
819 if (!ST->usingLargeMem()) {
820 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
822 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
823 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
824 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
827 cerr << "LowerGlobalAddress: Relocation model other than static not "
836 //! Custom lower i64 integer constants
838 This code inserts all of the necessary juggling that needs to occur to load
839 a 64-bit constant into a register.
842 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
843 unsigned VT = Op.getValueType();
844 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
846 if (VT == MVT::i64) {
847 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
848 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
849 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
851 cerr << "LowerConstant: unhandled constant type "
852 << MVT::getValueTypeString(VT)
861 //! Custom lower double precision floating point constants
863 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
864 unsigned VT = Op.getValueType();
865 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
868 "LowerConstantFP: Node is not ConstantFPSDNode");
870 if (VT == MVT::f64) {
871 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
872 return DAG.getNode(ISD::BIT_CONVERT, VT,
873 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
879 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
881 LowerBRCOND(SDOperand Op, SelectionDAG &DAG)
883 SDOperand Cond = Op.getOperand(1);
884 MVT::ValueType CondVT = Cond.getValueType();
885 MVT::ValueType CondNVT;
887 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
888 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
889 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
891 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
894 return SDOperand(); // Unchanged
898 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
900 MachineFunction &MF = DAG.getMachineFunction();
901 MachineFrameInfo *MFI = MF.getFrameInfo();
902 MachineRegisterInfo &RegInfo = MF.getRegInfo();
903 SmallVector<SDOperand, 8> ArgValues;
904 SDOperand Root = Op.getOperand(0);
905 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
907 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
908 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
910 unsigned ArgOffset = SPUFrameInfo::minStackSize();
911 unsigned ArgRegIdx = 0;
912 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
914 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
916 // Add DAG nodes to load the arguments or copy them out of registers.
917 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
919 bool needsLoad = false;
920 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
921 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
925 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
926 << MVT::getValueTypeString(ObjectVT)
931 if (!isVarArg && ArgRegIdx < NumArgRegs) {
932 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
933 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
934 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
941 if (!isVarArg && ArgRegIdx < NumArgRegs) {
942 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
943 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
944 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
951 if (!isVarArg && ArgRegIdx < NumArgRegs) {
952 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
953 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
954 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
961 if (!isVarArg && ArgRegIdx < NumArgRegs) {
962 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
963 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
964 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
971 if (!isVarArg && ArgRegIdx < NumArgRegs) {
972 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
973 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
974 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
981 if (!isVarArg && ArgRegIdx < NumArgRegs) {
982 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
983 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
984 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
996 if (!isVarArg && ArgRegIdx < NumArgRegs) {
997 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
998 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
999 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1007 // We need to load the argument to a virtual register if we determined above
1008 // that we ran out of physical registers of the appropriate type
1010 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1011 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1012 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1013 ArgOffset += StackSlotSize;
1016 ArgValues.push_back(ArgVal);
1019 // If the function takes variable number of arguments, make a frame index for
1020 // the start of the first vararg value... for expansion of llvm.va_start.
1022 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1024 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1025 // If this function is vararg, store any remaining integer argument regs to
1026 // their spots on the stack so that they may be loaded by deferencing the
1027 // result of va_next.
1028 SmallVector<SDOperand, 8> MemOps;
1029 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1030 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1031 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1032 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1033 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1034 MemOps.push_back(Store);
1035 // Increment the address by four for the next argument to store
1036 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1037 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1039 if (!MemOps.empty())
1040 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1043 ArgValues.push_back(Root);
1045 // Return the new list of results.
1046 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1047 Op.Val->value_end());
1048 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1051 /// isLSAAddress - Return the immediate to use if the specified
1052 /// value is representable as a LSA address.
1053 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1054 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1057 int Addr = C->getValue();
1058 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1059 (Addr << 14 >> 14) != Addr)
1060 return 0; // Top 14 bits have to be sext of immediate.
1062 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1067 LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1068 SDOperand Chain = Op.getOperand(0);
1070 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1071 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1073 SDOperand Callee = Op.getOperand(4);
1074 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1075 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1076 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1077 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1079 // Handy pointer type
1080 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1082 // Accumulate how many bytes are to be pushed on the stack, including the
1083 // linkage area, and parameter passing area. According to the SPU ABI,
1084 // we minimally need space for [LR] and [SP]
1085 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1087 // Set up a copy of the stack pointer for use loading and storing any
1088 // arguments that may not fit in the registers available for argument
1090 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1092 // Figure out which arguments are going to go in registers, and which in
1094 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1095 unsigned ArgRegIdx = 0;
1097 // Keep track of registers passing arguments
1098 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1099 // And the arguments passed on the stack
1100 SmallVector<SDOperand, 8> MemOpChains;
1102 for (unsigned i = 0; i != NumOps; ++i) {
1103 SDOperand Arg = Op.getOperand(5+2*i);
1105 // PtrOff will be used to store the current argument to the stack if a
1106 // register cannot be found for it.
1107 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1108 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1110 switch (Arg.getValueType()) {
1111 default: assert(0 && "Unexpected ValueType for argument!");
1115 if (ArgRegIdx != NumArgRegs) {
1116 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1118 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1119 ArgOffset += StackSlotSize;
1124 if (ArgRegIdx != NumArgRegs) {
1125 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1127 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1128 ArgOffset += StackSlotSize;
1135 if (ArgRegIdx != NumArgRegs) {
1136 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1138 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1139 ArgOffset += StackSlotSize;
1145 // Update number of stack bytes actually used, insert a call sequence start
1146 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1147 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1149 if (!MemOpChains.empty()) {
1150 // Adjust the stack pointer for the stack arguments.
1151 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1152 &MemOpChains[0], MemOpChains.size());
1155 // Build a sequence of copy-to-reg nodes chained together with token chain
1156 // and flag operands which copy the outgoing args into the appropriate regs.
1158 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1159 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1161 InFlag = Chain.getValue(1);
1164 std::vector<MVT::ValueType> NodeTys;
1165 NodeTys.push_back(MVT::Other); // Returns a chain
1166 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1168 SmallVector<SDOperand, 8> Ops;
1169 unsigned CallOpc = SPUISD::CALL;
1171 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1172 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1173 // node so that legalize doesn't hack it.
1174 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1175 GlobalValue *GV = G->getGlobal();
1176 unsigned CalleeVT = Callee.getValueType();
1177 SDOperand Zero = DAG.getConstant(0, PtrVT);
1178 SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1180 if (!ST->usingLargeMem()) {
1181 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1182 // style calls, otherwise, external symbols are BRASL calls. This assumes
1183 // that declared/defined symbols are in the same compilation unit and can
1184 // be reached through PC-relative jumps.
1187 // This may be an unsafe assumption for JIT and really large compilation
1189 if (GV->isDeclaration()) {
1190 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1192 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1195 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1197 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1199 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1200 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1201 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1202 // If this is an absolute destination address that appears to be a legal
1203 // local store address, use the munged value.
1204 Callee = SDOperand(Dest, 0);
1207 Ops.push_back(Chain);
1208 Ops.push_back(Callee);
1210 // Add argument registers to the end of the list so that they are known live
1212 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1213 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1214 RegsToPass[i].second.getValueType()));
1217 Ops.push_back(InFlag);
1218 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1219 InFlag = Chain.getValue(1);
1221 Chain = DAG.getCALLSEQ_END(Chain,
1222 DAG.getConstant(NumStackBytes, PtrVT),
1223 DAG.getConstant(0, PtrVT),
1225 if (Op.Val->getValueType(0) != MVT::Other)
1226 InFlag = Chain.getValue(1);
1228 SDOperand ResultVals[3];
1229 unsigned NumResults = 0;
1232 // If the call has results, copy the values out of the ret val registers.
1233 switch (Op.Val->getValueType(0)) {
1234 default: assert(0 && "Unexpected ret value!");
1235 case MVT::Other: break;
1237 if (Op.Val->getValueType(1) == MVT::i32) {
1238 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1239 ResultVals[0] = Chain.getValue(0);
1240 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1241 Chain.getValue(2)).getValue(1);
1242 ResultVals[1] = Chain.getValue(0);
1244 NodeTys.push_back(MVT::i32);
1246 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1247 ResultVals[0] = Chain.getValue(0);
1250 NodeTys.push_back(MVT::i32);
1253 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1254 ResultVals[0] = Chain.getValue(0);
1256 NodeTys.push_back(MVT::i64);
1260 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1261 InFlag).getValue(1);
1262 ResultVals[0] = Chain.getValue(0);
1264 NodeTys.push_back(Op.Val->getValueType(0));
1271 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1272 InFlag).getValue(1);
1273 ResultVals[0] = Chain.getValue(0);
1275 NodeTys.push_back(Op.Val->getValueType(0));
1279 NodeTys.push_back(MVT::Other);
1281 // If the function returns void, just return the chain.
1282 if (NumResults == 0)
1285 // Otherwise, merge everything together with a MERGE_VALUES node.
1286 ResultVals[NumResults++] = Chain;
1287 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1288 ResultVals, NumResults);
1289 return Res.getValue(Op.ResNo);
1293 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1294 SmallVector<CCValAssign, 16> RVLocs;
1295 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1296 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1297 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1298 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1300 // If this is the first return lowered for this function, add the regs to the
1301 // liveout set for the function.
1302 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1303 for (unsigned i = 0; i != RVLocs.size(); ++i)
1304 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1307 SDOperand Chain = Op.getOperand(0);
1310 // Copy the result values into the output registers.
1311 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1312 CCValAssign &VA = RVLocs[i];
1313 assert(VA.isRegLoc() && "Can only return in registers!");
1314 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1315 Flag = Chain.getValue(1);
1319 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1321 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1325 //===----------------------------------------------------------------------===//
1326 // Vector related lowering:
1327 //===----------------------------------------------------------------------===//
1329 static ConstantSDNode *
1330 getVecImm(SDNode *N) {
1331 SDOperand OpVal(0, 0);
1333 // Check to see if this buildvec has a single non-undef value in its elements.
1334 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1335 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1337 OpVal = N->getOperand(i);
1338 else if (OpVal != N->getOperand(i))
1342 if (OpVal.Val != 0) {
1343 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1348 return 0; // All UNDEF: use implicit def.; not Constant node
1351 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1352 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1354 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1355 MVT::ValueType ValueType) {
1356 if (ConstantSDNode *CN = getVecImm(N)) {
1357 uint64_t Value = CN->getValue();
1358 if (ValueType == MVT::i64) {
1359 uint64_t UValue = CN->getValue();
1360 uint32_t upper = uint32_t(UValue >> 32);
1361 uint32_t lower = uint32_t(UValue);
1364 Value = Value >> 32;
1366 if (Value <= 0x3ffff)
1367 return DAG.getConstant(Value, ValueType);
1373 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1374 /// and the value fits into a signed 16-bit constant, and if so, return the
1376 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1377 MVT::ValueType ValueType) {
1378 if (ConstantSDNode *CN = getVecImm(N)) {
1379 int64_t Value = CN->getSignExtended();
1380 if (ValueType == MVT::i64) {
1381 uint64_t UValue = CN->getValue();
1382 uint32_t upper = uint32_t(UValue >> 32);
1383 uint32_t lower = uint32_t(UValue);
1386 Value = Value >> 32;
1388 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1389 return DAG.getConstant(Value, ValueType);
1396 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1397 /// and the value fits into a signed 10-bit constant, and if so, return the
1399 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1400 MVT::ValueType ValueType) {
1401 if (ConstantSDNode *CN = getVecImm(N)) {
1402 int64_t Value = CN->getSignExtended();
1403 if (ValueType == MVT::i64) {
1404 uint64_t UValue = CN->getValue();
1405 uint32_t upper = uint32_t(UValue >> 32);
1406 uint32_t lower = uint32_t(UValue);
1409 Value = Value >> 32;
1411 if (isS10Constant(Value))
1412 return DAG.getConstant(Value, ValueType);
1418 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1419 /// and the value fits into a signed 8-bit constant, and if so, return the
1422 /// @note: The incoming vector is v16i8 because that's the only way we can load
1423 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1425 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1426 MVT::ValueType ValueType) {
1427 if (ConstantSDNode *CN = getVecImm(N)) {
1428 int Value = (int) CN->getValue();
1429 if (ValueType == MVT::i16
1430 && Value <= 0xffff /* truncated from uint64_t */
1431 && ((short) Value >> 8) == ((short) Value & 0xff))
1432 return DAG.getConstant(Value & 0xff, ValueType);
1433 else if (ValueType == MVT::i8
1434 && (Value & 0xff) == Value)
1435 return DAG.getConstant(Value, ValueType);
1441 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1442 /// and the value fits into a signed 16-bit constant, and if so, return the
1444 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1445 MVT::ValueType ValueType) {
1446 if (ConstantSDNode *CN = getVecImm(N)) {
1447 uint64_t Value = CN->getValue();
1448 if ((ValueType == MVT::i32
1449 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1450 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1451 return DAG.getConstant(Value >> 16, ValueType);
1457 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1458 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1459 if (ConstantSDNode *CN = getVecImm(N)) {
1460 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1466 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1467 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1468 if (ConstantSDNode *CN = getVecImm(N)) {
1469 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1475 // If this is a vector of constants or undefs, get the bits. A bit in
1476 // UndefBits is set if the corresponding element of the vector is an
1477 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1478 // zero. Return true if this is not an array of constants, false if it is.
1480 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1481 uint64_t UndefBits[2]) {
1482 // Start with zero'd results.
1483 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1485 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1486 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1487 SDOperand OpVal = BV->getOperand(i);
1489 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1490 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1492 uint64_t EltBits = 0;
1493 if (OpVal.getOpcode() == ISD::UNDEF) {
1494 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1495 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1497 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1498 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1499 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1500 const APFloat &apf = CN->getValueAPF();
1501 EltBits = (CN->getValueType(0) == MVT::f32
1502 ? FloatToBits(apf.convertToFloat())
1503 : DoubleToBits(apf.convertToDouble()));
1505 // Nonconstant element.
1509 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1512 //printf("%llx %llx %llx %llx\n",
1513 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1517 /// If this is a splat (repetition) of a value across the whole vector, return
1518 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1519 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1520 /// SplatSize = 1 byte.
1521 static bool isConstantSplat(const uint64_t Bits128[2],
1522 const uint64_t Undef128[2],
1524 uint64_t &SplatBits, uint64_t &SplatUndef,
1526 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1527 // the same as the lower 64-bits, ignoring undefs.
1528 uint64_t Bits64 = Bits128[0] | Bits128[1];
1529 uint64_t Undef64 = Undef128[0] & Undef128[1];
1530 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1531 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1532 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1533 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1535 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1536 if (MinSplatBits < 64) {
1538 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1540 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1541 if (MinSplatBits < 32) {
1543 // If the top 16-bits are different than the lower 16-bits, ignoring
1544 // undefs, we have an i32 splat.
1545 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1546 if (MinSplatBits < 16) {
1547 // If the top 8-bits are different than the lower 8-bits, ignoring
1548 // undefs, we have an i16 splat.
1549 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1550 // Otherwise, we have an 8-bit splat.
1551 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1552 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1558 SplatUndef = Undef16;
1565 SplatUndef = Undef32;
1571 SplatBits = Bits128[0];
1572 SplatUndef = Undef128[0];
1578 return false; // Can't be a splat if two pieces don't match.
1581 // If this is a case we can't handle, return null and let the default
1582 // expansion code take care of it. If we CAN select this case, and if it
1583 // selects to a single instruction, return Op. Otherwise, if we can codegen
1584 // this case more efficiently than a constant pool load, lower it to the
1585 // sequence of ops that should be used.
1586 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1587 MVT::ValueType VT = Op.getValueType();
1588 // If this is a vector of constants or undefs, get the bits. A bit in
1589 // UndefBits is set if the corresponding element of the vector is an
1590 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1592 uint64_t VectorBits[2];
1593 uint64_t UndefBits[2];
1594 uint64_t SplatBits, SplatUndef;
1596 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1597 || !isConstantSplat(VectorBits, UndefBits,
1598 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1599 SplatBits, SplatUndef, SplatSize))
1600 return SDOperand(); // Not a constant vector, not a splat.
1605 uint32_t Value32 = SplatBits;
1606 assert(SplatSize == 4
1607 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1608 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1609 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1610 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1611 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1615 uint64_t f64val = SplatBits;
1616 assert(SplatSize == 8
1617 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1618 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1619 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1620 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1621 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1625 // 8-bit constants have to be expanded to 16-bits
1626 unsigned short Value16 = SplatBits | (SplatBits << 8);
1628 for (int i = 0; i < 8; ++i)
1629 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1630 return DAG.getNode(ISD::BIT_CONVERT, VT,
1631 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1634 unsigned short Value16;
1636 Value16 = (unsigned short) (SplatBits & 0xffff);
1638 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1639 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1641 for (int i = 0; i < 8; ++i) Ops[i] = T;
1642 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1645 unsigned int Value = SplatBits;
1646 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1647 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1650 uint64_t val = SplatBits;
1651 uint32_t upper = uint32_t(val >> 32);
1652 uint32_t lower = uint32_t(val);
1654 if (upper == lower) {
1655 // Magic constant that can be matched by IL, ILA, et. al.
1656 SDOperand Val = DAG.getTargetConstant(val, MVT::i64);
1657 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1661 SmallVector<SDOperand, 16> ShufBytes;
1663 bool upper_special, lower_special;
1665 // NOTE: This code creates common-case shuffle masks that can be easily
1666 // detected as common expressions. It is not attempting to create highly
1667 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1669 // Detect if the upper or lower half is a special shuffle mask pattern:
1670 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1671 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1673 // Create lower vector if not a special pattern
1674 if (!lower_special) {
1675 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1676 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1677 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1678 LO32C, LO32C, LO32C, LO32C));
1681 // Create upper vector if not a special pattern
1682 if (!upper_special) {
1683 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1684 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1685 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1686 HI32C, HI32C, HI32C, HI32C));
1689 // If either upper or lower are special, then the two input operands are
1690 // the same (basically, one of them is a "don't care")
1695 if (lower_special && upper_special) {
1696 // Unhappy situation... both upper and lower are special, so punt with
1697 // a target constant:
1698 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1699 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1703 for (int i = 0; i < 4; ++i) {
1704 for (int j = 0; j < 4; ++j) {
1706 bool process_upper, process_lower;
1709 process_upper = (upper_special && (i & 1) == 0);
1710 process_lower = (lower_special && (i & 1) == 1);
1712 if (process_upper || process_lower) {
1713 if ((process_upper && upper == 0)
1714 || (process_lower && lower == 0))
1716 else if ((process_upper && upper == 0xffffffff)
1717 || (process_lower && lower == 0xffffffff))
1719 else if ((process_upper && upper == 0x80000000)
1720 || (process_lower && lower == 0x80000000))
1721 val = (j == 0 ? 0xe0 : 0x80);
1723 val = i * 4 + j + ((i & 1) * 16);
1725 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1729 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1730 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1731 &ShufBytes[0], ShufBytes.size()));
1739 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1740 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1741 /// permutation vector, V3, is monotonically increasing with one "exception"
1742 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1743 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1744 /// In either case, the net result is going to eventually invoke SHUFB to
1745 /// permute/shuffle the bytes from V1 and V2.
1747 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1748 /// control word for byte/halfword/word insertion. This takes care of a single
1749 /// element move from V2 into V1.
1751 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1752 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1753 SDOperand V1 = Op.getOperand(0);
1754 SDOperand V2 = Op.getOperand(1);
1755 SDOperand PermMask = Op.getOperand(2);
1757 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1759 // If we have a single element being moved from V1 to V2, this can be handled
1760 // using the C*[DX] compute mask instructions, but the vector elements have
1761 // to be monotonically increasing with one exception element.
1762 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1763 unsigned EltsFromV2 = 0;
1765 unsigned V2EltIdx0 = 0;
1766 unsigned CurrElt = 0;
1767 bool monotonic = true;
1768 if (EltVT == MVT::i8)
1770 else if (EltVT == MVT::i16)
1772 else if (EltVT == MVT::i32)
1775 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1777 for (unsigned i = 0, e = PermMask.getNumOperands();
1778 EltsFromV2 <= 1 && monotonic && i != e;
1781 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1784 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1786 if (SrcElt >= V2EltIdx0) {
1788 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1789 } else if (CurrElt != SrcElt) {
1796 if (EltsFromV2 == 1 && monotonic) {
1797 // Compute mask and shuffle
1798 MachineFunction &MF = DAG.getMachineFunction();
1799 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1800 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1801 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1802 // Initialize temporary register to 0
1803 SDOperand InitTempReg =
1804 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1805 // Copy register's contents as index in INSERT_MASK:
1806 SDOperand ShufMaskOp =
1807 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1808 DAG.getTargetConstant(V2Elt, MVT::i32),
1809 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1810 // Use shuffle mask in SHUFB synthetic instruction:
1811 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1813 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1814 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1816 SmallVector<SDOperand, 16> ResultMask;
1817 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1819 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1822 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1824 for (unsigned j = 0; j < BytesPerElement; ++j) {
1825 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1830 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1831 &ResultMask[0], ResultMask.size());
1832 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1836 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1837 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1839 if (Op0.Val->getOpcode() == ISD::Constant) {
1840 // For a constant, build the appropriate constant vector, which will
1841 // eventually simplify to a vector register load.
1843 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1844 SmallVector<SDOperand, 16> ConstVecValues;
1848 // Create a constant vector:
1849 switch (Op.getValueType()) {
1850 default: assert(0 && "Unexpected constant value type in "
1851 "LowerSCALAR_TO_VECTOR");
1852 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1853 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1854 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1855 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1856 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1857 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1860 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1861 for (size_t j = 0; j < n_copies; ++j)
1862 ConstVecValues.push_back(CValue);
1864 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1865 &ConstVecValues[0], ConstVecValues.size());
1867 // Otherwise, copy the value from one register to another:
1868 switch (Op0.getValueType()) {
1869 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1876 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1883 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1884 switch (Op.getValueType()) {
1886 SDOperand rA = Op.getOperand(0);
1887 SDOperand rB = Op.getOperand(1);
1888 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1889 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1890 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1891 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1893 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1897 // Multiply two v8i16 vectors (pipeline friendly version):
1898 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1899 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1900 // c) Use SELB to select upper and lower halves from the intermediate results
1902 // NOTE: We really want to move the FSMBI to earlier to actually get the
1903 // dual-issue. This code does manage to do this, even if it's a little on
1906 MachineFunction &MF = DAG.getMachineFunction();
1907 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1908 SDOperand Chain = Op.getOperand(0);
1909 SDOperand rA = Op.getOperand(0);
1910 SDOperand rB = Op.getOperand(1);
1911 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1912 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1915 DAG.getCopyToReg(Chain, FSMBIreg,
1916 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1917 DAG.getConstant(0xcccc, MVT::i32)));
1920 DAG.getCopyToReg(FSMBOp, HiProdReg,
1921 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1923 SDOperand HHProd_v4i32 =
1924 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1925 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1927 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1928 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1929 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1930 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1932 DAG.getConstant(16, MVT::i16))),
1933 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1936 // This M00sE is N@stI! (apologies to Monty Python)
1938 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1939 // is to break it all apart, sign extend, and reassemble the various
1940 // intermediate products.
1942 SDOperand rA = Op.getOperand(0);
1943 SDOperand rB = Op.getOperand(1);
1944 SDOperand c8 = DAG.getConstant(8, MVT::i32);
1945 SDOperand c16 = DAG.getConstant(16, MVT::i32);
1948 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1949 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1950 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1952 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1954 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1957 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1958 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1960 SDOperand FSMBmask = DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1961 DAG.getConstant(0x2222, MVT::i32));
1963 SDOperand LoProdParts =
1964 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1965 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1966 LLProd, LHProd, FSMBmask));
1968 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1971 DAG.getNode(ISD::AND, MVT::v4i32,
1973 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1974 LoProdMask, LoProdMask,
1975 LoProdMask, LoProdMask));
1978 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1979 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1982 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1983 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1986 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1987 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1988 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1990 SDOperand HHProd_1 =
1991 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1992 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1993 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1994 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1995 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1998 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2000 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2004 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2006 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2007 DAG.getNode(ISD::OR, MVT::v4i32,
2012 cerr << "CellSPU: Unknown vector multiplication, got "
2013 << MVT::getValueTypeString(Op.getValueType())
2022 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2023 MachineFunction &MF = DAG.getMachineFunction();
2024 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2026 SDOperand A = Op.getOperand(0);
2027 SDOperand B = Op.getOperand(1);
2028 unsigned VT = Op.getValueType();
2030 unsigned VRegBR, VRegC;
2032 if (VT == MVT::f32) {
2033 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2034 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2036 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2037 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2039 // TODO: make sure we're feeding FPInterp the right arguments
2040 // Right now: fi B, frest(B)
2043 // (Floating Interpolate (FP Reciprocal Estimate B))
2045 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2046 DAG.getNode(SPUISD::FPInterp, VT, B,
2047 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2049 // Computes A * BRcpl and stores in a temporary register
2051 DAG.getCopyToReg(BRcpl, VRegC,
2052 DAG.getNode(ISD::FMUL, VT, A,
2053 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2054 // What's the Chain variable do? It's magic!
2055 // TODO: set Chain = Op(0).getEntryNode()
2057 return DAG.getNode(ISD::FADD, VT,
2058 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2059 DAG.getNode(ISD::FMUL, VT,
2060 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2061 DAG.getNode(ISD::FSUB, VT, A,
2062 DAG.getNode(ISD::FMUL, VT, B,
2063 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2066 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2067 unsigned VT = Op.getValueType();
2068 SDOperand N = Op.getOperand(0);
2069 SDOperand Elt = Op.getOperand(1);
2070 SDOperand ShufMask[16];
2071 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2073 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2075 int EltNo = (int) C->getValue();
2078 if (VT == MVT::i8 && EltNo >= 16)
2079 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2080 else if (VT == MVT::i16 && EltNo >= 8)
2081 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2082 else if (VT == MVT::i32 && EltNo >= 4)
2083 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2084 else if (VT == MVT::i64 && EltNo >= 2)
2085 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2087 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2088 // i32 and i64: Element 0 is the preferred slot
2089 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2092 // Need to generate shuffle mask and extract:
2093 int prefslot_begin = -1, prefslot_end = -1;
2094 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2098 prefslot_begin = prefslot_end = 3;
2102 prefslot_begin = 2; prefslot_end = 3;
2106 prefslot_begin = 0; prefslot_end = 3;
2110 prefslot_begin = 0; prefslot_end = 7;
2115 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2116 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2118 for (int i = 0; i < 16; ++i) {
2119 // zero fill uppper part of preferred slot, don't care about the
2121 unsigned int mask_val;
2123 if (i <= prefslot_end) {
2125 ((i < prefslot_begin)
2127 : elt_byte + (i - prefslot_begin));
2129 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2131 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2134 SDOperand ShufMaskVec =
2135 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2137 sizeof(ShufMask) / sizeof(ShufMask[0]));
2139 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2140 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2141 N, N, ShufMaskVec));
2145 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2146 SDOperand VecOp = Op.getOperand(0);
2147 SDOperand ValOp = Op.getOperand(1);
2148 SDOperand IdxOp = Op.getOperand(2);
2149 MVT::ValueType VT = Op.getValueType();
2151 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2152 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2154 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2155 // Use $2 because it's always 16-byte aligned and it's available:
2156 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2159 DAG.getNode(SPUISD::SHUFB, VT,
2160 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2162 DAG.getNode(SPUISD::INSERT_MASK, VT,
2163 DAG.getNode(ISD::ADD, PtrVT,
2165 DAG.getConstant(CN->getValue(),
2171 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2173 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2175 assert(Op.getValueType() == MVT::i8);
2178 assert(0 && "Unhandled i8 math operator");
2182 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2184 SDOperand N1 = Op.getOperand(1);
2185 N0 = (N0.getOpcode() != ISD::Constant
2186 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2187 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2188 N1 = (N1.getOpcode() != ISD::Constant
2189 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2190 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2191 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2192 DAG.getNode(Opc, MVT::i16, N0, N1));
2196 SDOperand N1 = Op.getOperand(1);
2198 N0 = (N0.getOpcode() != ISD::Constant
2199 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2200 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2201 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2202 N1 = (N1.getOpcode() != ISD::Constant
2203 ? DAG.getNode(N1Opc, MVT::i16, N1)
2204 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2205 SDOperand ExpandArg =
2206 DAG.getNode(ISD::OR, MVT::i16, N0,
2207 DAG.getNode(ISD::SHL, MVT::i16,
2208 N0, DAG.getConstant(8, MVT::i16)));
2209 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2210 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2214 SDOperand N1 = Op.getOperand(1);
2216 N0 = (N0.getOpcode() != ISD::Constant
2217 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2218 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2219 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2220 N1 = (N1.getOpcode() != ISD::Constant
2221 ? DAG.getNode(N1Opc, MVT::i16, N1)
2222 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2223 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2224 DAG.getNode(Opc, MVT::i16, N0, N1));
2227 SDOperand N1 = Op.getOperand(1);
2229 N0 = (N0.getOpcode() != ISD::Constant
2230 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2231 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2232 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2233 N1 = (N1.getOpcode() != ISD::Constant
2234 ? DAG.getNode(N1Opc, MVT::i16, N1)
2235 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2236 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2237 DAG.getNode(Opc, MVT::i16, N0, N1));
2240 SDOperand N1 = Op.getOperand(1);
2242 N0 = (N0.getOpcode() != ISD::Constant
2243 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2244 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2245 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2246 N1 = (N1.getOpcode() != ISD::Constant
2247 ? DAG.getNode(N1Opc, MVT::i16, N1)
2248 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2249 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2250 DAG.getNode(Opc, MVT::i16, N0, N1));
2258 static SDOperand LowerI64Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2260 MVT::ValueType VT = Op.getValueType();
2262 MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2264 SDOperand Op0 = Op.getOperand(0);
2267 case ISD::ZERO_EXTEND:
2268 case ISD::SIGN_EXTEND:
2269 case ISD::ANY_EXTEND: {
2270 MVT::ValueType Op0VT = Op0.getValueType();
2272 MVT::getVectorType(Op0VT, (128 / MVT::getSizeInBits(Op0VT)));
2274 assert(Op0VT == MVT::i32
2275 && "CellSPU: Zero/sign extending something other than i32");
2277 unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2278 ? SPUISD::ROTBYTES_RIGHT_S
2279 : SPUISD::ROTQUAD_RZ_BYTES);
2280 SDOperand PromoteScalar =
2281 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2283 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2284 DAG.getNode(ISD::BIT_CONVERT, VecVT,
2285 DAG.getNode(NewOpc, Op0VecVT,
2287 DAG.getConstant(4, MVT::i32))));
2291 SDOperand ShiftAmt = Op.getOperand(1);
2292 unsigned ShiftAmtVT = unsigned(ShiftAmt.getValueType());
2293 SDOperand Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2294 SDOperand MaskLower =
2295 DAG.getNode(SPUISD::SELB, VecVT,
2297 DAG.getConstant(0, VecVT),
2298 DAG.getNode(SPUISD::FSMBI, VecVT,
2299 DAG.getConstant(0xff00ULL, MVT::i16)));
2300 SDOperand ShiftAmtBytes =
2301 DAG.getNode(ISD::SRL, ShiftAmtVT,
2303 DAG.getConstant(3, ShiftAmtVT));
2304 SDOperand ShiftAmtBits =
2305 DAG.getNode(ISD::AND, ShiftAmtVT,
2307 DAG.getConstant(7, ShiftAmtVT));
2309 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2310 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2311 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2312 MaskLower, ShiftAmtBytes),
2317 unsigned VT = unsigned(Op.getValueType());
2318 SDOperand ShiftAmt = Op.getOperand(1);
2319 unsigned ShiftAmtVT = unsigned(ShiftAmt.getValueType());
2320 SDOperand ShiftAmtBytes =
2321 DAG.getNode(ISD::SRL, ShiftAmtVT,
2323 DAG.getConstant(3, ShiftAmtVT));
2324 SDOperand ShiftAmtBits =
2325 DAG.getNode(ISD::AND, ShiftAmtVT,
2327 DAG.getConstant(7, ShiftAmtVT));
2329 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2330 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2331 Op0, ShiftAmtBytes),
2339 //! Lower byte immediate operations for v16i8 vectors:
2341 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2344 MVT::ValueType VT = Op.getValueType();
2346 ConstVec = Op.getOperand(0);
2347 Arg = Op.getOperand(1);
2348 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2349 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2350 ConstVec = ConstVec.getOperand(0);
2352 ConstVec = Op.getOperand(1);
2353 Arg = Op.getOperand(0);
2354 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2355 ConstVec = ConstVec.getOperand(0);
2360 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2361 uint64_t VectorBits[2];
2362 uint64_t UndefBits[2];
2363 uint64_t SplatBits, SplatUndef;
2366 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2367 && isConstantSplat(VectorBits, UndefBits,
2368 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2369 SplatBits, SplatUndef, SplatSize)) {
2370 SDOperand tcVec[16];
2371 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2372 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2374 // Turn the BUILD_VECTOR into a set of target constants:
2375 for (size_t i = 0; i < tcVecSize; ++i)
2378 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2379 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2386 //! Lower i32 multiplication
2387 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2391 cerr << "CellSPU: Unknown LowerMUL value type, got "
2392 << MVT::getValueTypeString(Op.getValueType())
2398 SDOperand rA = Op.getOperand(0);
2399 SDOperand rB = Op.getOperand(1);
2401 return DAG.getNode(ISD::ADD, MVT::i32,
2402 DAG.getNode(ISD::ADD, MVT::i32,
2403 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2404 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2405 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2412 //! Custom lowering for CTPOP (count population)
2414 Custom lowering code that counts the number ones in the input
2415 operand. SPU has such an instruction, but it counts the number of
2416 ones per byte, which then have to be accumulated.
2418 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2419 unsigned VT = Op.getValueType();
2420 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2424 SDOperand N = Op.getOperand(0);
2425 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2427 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2428 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2430 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2434 MachineFunction &MF = DAG.getMachineFunction();
2435 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2437 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2439 SDOperand N = Op.getOperand(0);
2440 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2441 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2442 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2444 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2445 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2447 // CNTB_result becomes the chain to which all of the virtual registers
2448 // CNTB_reg, SUM1_reg become associated:
2449 SDOperand CNTB_result =
2450 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2452 SDOperand CNTB_rescopy =
2453 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2455 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2457 return DAG.getNode(ISD::AND, MVT::i16,
2458 DAG.getNode(ISD::ADD, MVT::i16,
2459 DAG.getNode(ISD::SRL, MVT::i16,
2466 MachineFunction &MF = DAG.getMachineFunction();
2467 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2469 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2470 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2472 SDOperand N = Op.getOperand(0);
2473 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2474 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2475 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2476 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2478 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2479 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2481 // CNTB_result becomes the chain to which all of the virtual registers
2482 // CNTB_reg, SUM1_reg become associated:
2483 SDOperand CNTB_result =
2484 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2486 SDOperand CNTB_rescopy =
2487 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2490 DAG.getNode(ISD::SRL, MVT::i32,
2491 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2494 DAG.getNode(ISD::ADD, MVT::i32,
2495 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2497 SDOperand Sum1_rescopy =
2498 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2501 DAG.getNode(ISD::SRL, MVT::i32,
2502 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2505 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2506 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2508 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2518 /// LowerOperation - Provide custom lowering hooks for some operations.
2521 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2523 unsigned Opc = (unsigned) Op.getOpcode();
2524 unsigned VT = (unsigned) Op.getValueType();
2528 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2529 cerr << "Op.getOpcode() = " << Opc << "\n";
2530 cerr << "*Op.Val:\n";
2537 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2539 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2540 case ISD::ConstantPool:
2541 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2542 case ISD::GlobalAddress:
2543 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2544 case ISD::JumpTable:
2545 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2547 return LowerConstant(Op, DAG);
2548 case ISD::ConstantFP:
2549 return LowerConstantFP(Op, DAG);
2551 return LowerBRCOND(Op, DAG);
2552 case ISD::FORMAL_ARGUMENTS:
2553 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2555 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2557 return LowerRET(Op, DAG, getTargetMachine());
2560 // i8, i64 math ops:
2561 case ISD::ZERO_EXTEND:
2562 case ISD::SIGN_EXTEND:
2563 case ISD::ANY_EXTEND:
2571 return LowerI8Math(Op, DAG, Opc);
2572 else if (VT == MVT::i64)
2573 return LowerI64Math(Op, DAG, Opc);
2576 // Vector-related lowering.
2577 case ISD::BUILD_VECTOR:
2578 return LowerBUILD_VECTOR(Op, DAG);
2579 case ISD::SCALAR_TO_VECTOR:
2580 return LowerSCALAR_TO_VECTOR(Op, DAG);
2581 case ISD::VECTOR_SHUFFLE:
2582 return LowerVECTOR_SHUFFLE(Op, DAG);
2583 case ISD::EXTRACT_VECTOR_ELT:
2584 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2585 case ISD::INSERT_VECTOR_ELT:
2586 return LowerINSERT_VECTOR_ELT(Op, DAG);
2588 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2592 return LowerByteImmed(Op, DAG);
2594 // Vector and i8 multiply:
2596 if (MVT::isVector(VT))
2597 return LowerVectorMUL(Op, DAG);
2598 else if (VT == MVT::i8)
2599 return LowerI8Math(Op, DAG, Opc);
2601 return LowerMUL(Op, DAG, VT, Opc);
2604 if (VT == MVT::f32 || VT == MVT::v4f32)
2605 return LowerFDIVf32(Op, DAG);
2606 // else if (Op.getValueType() == MVT::f64)
2607 // return LowerFDIVf64(Op, DAG);
2609 assert(0 && "Calling FDIV on unsupported MVT");
2612 return LowerCTPOP(Op, DAG);
2618 //===----------------------------------------------------------------------===//
2619 // Target Optimization Hooks
2620 //===----------------------------------------------------------------------===//
2623 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2626 TargetMachine &TM = getTargetMachine();
2628 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2629 SelectionDAG &DAG = DCI.DAG;
2630 SDOperand Op0 = N->getOperand(0); // everything has at least one operand
2631 SDOperand Result; // Initially, NULL result
2633 switch (N->getOpcode()) {
2636 SDOperand Op1 = N->getOperand(1);
2638 if ((Op1.getOpcode() == ISD::Constant
2639 || Op1.getOpcode() == ISD::TargetConstant)
2640 && Op0.getOpcode() == SPUISD::IndirectAddr) {
2641 SDOperand Op01 = Op0.getOperand(1);
2642 if (Op01.getOpcode() == ISD::Constant
2643 || Op01.getOpcode() == ISD::TargetConstant) {
2644 // (add <const>, (SPUindirect <arg>, <const>)) ->
2645 // (SPUindirect <arg>, <const + const>)
2646 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2647 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2648 SDOperand combinedConst =
2649 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2650 Op0.getValueType());
2652 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2653 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2654 DEBUG(cerr << "With: (SPUindirect <arg>, "
2655 << CN0->getValue() + CN1->getValue() << ")\n");
2656 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2657 Op0.getOperand(0), combinedConst);
2659 } else if ((Op0.getOpcode() == ISD::Constant
2660 || Op0.getOpcode() == ISD::TargetConstant)
2661 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2662 SDOperand Op11 = Op1.getOperand(1);
2663 if (Op11.getOpcode() == ISD::Constant
2664 || Op11.getOpcode() == ISD::TargetConstant) {
2665 // (add (SPUindirect <arg>, <const>), <const>) ->
2666 // (SPUindirect <arg>, <const + const>)
2667 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2668 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2669 SDOperand combinedConst =
2670 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2671 Op0.getValueType());
2673 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2674 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2675 DEBUG(cerr << "With: (SPUindirect <arg>, "
2676 << CN0->getValue() + CN1->getValue() << ")\n");
2678 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2679 Op1.getOperand(0), combinedConst);
2684 case ISD::SIGN_EXTEND:
2685 case ISD::ZERO_EXTEND:
2686 case ISD::ANY_EXTEND: {
2687 if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2688 N->getValueType(0) == Op0.getValueType()) {
2689 // (any_extend (SPUextract_elt0 <arg>)) ->
2690 // (SPUextract_elt0 <arg>)
2691 // Types must match, however...
2692 DEBUG(cerr << "Replace: ");
2693 DEBUG(N->dump(&DAG));
2694 DEBUG(cerr << "\nWith: ");
2695 DEBUG(Op0.Val->dump(&DAG));
2696 DEBUG(cerr << "\n");
2702 case SPUISD::IndirectAddr: {
2703 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2704 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2705 if (CN->getValue() == 0) {
2706 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2707 // (SPUaform <addr>, 0)
2709 DEBUG(cerr << "Replace: ");
2710 DEBUG(N->dump(&DAG));
2711 DEBUG(cerr << "\nWith: ");
2712 DEBUG(Op0.Val->dump(&DAG));
2713 DEBUG(cerr << "\n");
2720 case SPUISD::SHLQUAD_L_BITS:
2721 case SPUISD::SHLQUAD_L_BYTES:
2722 case SPUISD::VEC_SHL:
2723 case SPUISD::VEC_SRL:
2724 case SPUISD::VEC_SRA:
2725 case SPUISD::ROTQUAD_RZ_BYTES:
2726 case SPUISD::ROTQUAD_RZ_BITS: {
2727 SDOperand Op1 = N->getOperand(1);
2729 if (isa<ConstantSDNode>(Op1)) {
2730 // Kill degenerate vector shifts:
2731 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2733 if (CN->getValue() == 0) {
2739 case SPUISD::PROMOTE_SCALAR: {
2740 switch (Op0.getOpcode()) {
2743 case ISD::ANY_EXTEND:
2744 case ISD::ZERO_EXTEND:
2745 case ISD::SIGN_EXTEND: {
2746 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2748 // but only if the SPUpromote_scalar and <arg> types match.
2749 SDOperand Op00 = Op0.getOperand(0);
2750 if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2751 SDOperand Op000 = Op00.getOperand(0);
2752 if (Op000.getValueType() == N->getValueType(0)) {
2758 case SPUISD::EXTRACT_ELT0: {
2759 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2761 Result = Op0.getOperand(0);
2768 // Otherwise, return unchanged.
2771 DEBUG(cerr << "\nReplace.SPU: ");
2772 DEBUG(N->dump(&DAG));
2773 DEBUG(cerr << "\nWith: ");
2774 DEBUG(Result.Val->dump(&DAG));
2775 DEBUG(cerr << "\n");
2782 //===----------------------------------------------------------------------===//
2783 // Inline Assembly Support
2784 //===----------------------------------------------------------------------===//
2786 /// getConstraintType - Given a constraint letter, return the type of
2787 /// constraint it is for this target.
2788 SPUTargetLowering::ConstraintType
2789 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2790 if (ConstraintLetter.size() == 1) {
2791 switch (ConstraintLetter[0]) {
2798 return C_RegisterClass;
2801 return TargetLowering::getConstraintType(ConstraintLetter);
2804 std::pair<unsigned, const TargetRegisterClass*>
2805 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2806 MVT::ValueType VT) const
2808 if (Constraint.size() == 1) {
2809 // GCC RS6000 Constraint Letters
2810 switch (Constraint[0]) {
2814 return std::make_pair(0U, SPU::R64CRegisterClass);
2815 return std::make_pair(0U, SPU::R32CRegisterClass);
2818 return std::make_pair(0U, SPU::R32FPRegisterClass);
2819 else if (VT == MVT::f64)
2820 return std::make_pair(0U, SPU::R64FPRegisterClass);
2823 return std::make_pair(0U, SPU::GPRCRegisterClass);
2827 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2830 //! Compute used/known bits for a SPU operand
2832 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2836 const SelectionDAG &DAG,
2837 unsigned Depth ) const {
2838 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2840 switch (Op.getOpcode()) {
2842 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2852 case SPUISD::PROMOTE_SCALAR: {
2853 SDOperand Op0 = Op.getOperand(0);
2854 uint64_t InMask = MVT::getIntVTBitMask(Op0.getValueType());
2855 KnownZero |= APInt(uint64_sizebits, ~InMask, false);
2856 KnownOne |= APInt(uint64_sizebits, InMask, false);
2860 case SPUISD::LDRESULT:
2861 case SPUISD::EXTRACT_ELT0:
2862 case SPUISD::EXTRACT_ELT0_CHAINED: {
2863 uint64_t InMask = MVT::getIntVTBitMask(Op.getValueType());
2864 KnownZero |= APInt(uint64_sizebits, ~InMask, false);
2865 KnownOne |= APInt(uint64_sizebits, InMask, false);
2870 case EXTRACT_I1_ZEXT:
2871 case EXTRACT_I1_SEXT:
2872 case EXTRACT_I8_ZEXT:
2873 case EXTRACT_I8_SEXT:
2878 case SHLQUAD_L_BITS:
2879 case SHLQUAD_L_BYTES:
2885 case ROTQUAD_RZ_BYTES:
2886 case ROTQUAD_RZ_BITS:
2887 case ROTBYTES_RIGHT_S:
2889 case ROTBYTES_LEFT_CHAINED:
2900 // LowerAsmOperandForConstraint
2902 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2903 char ConstraintLetter,
2904 std::vector<SDOperand> &Ops,
2905 SelectionDAG &DAG) {
2906 // Default, for the time being, to the base class handler
2907 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2910 /// isLegalAddressImmediate - Return true if the integer value can be used
2911 /// as the offset of the target addressing mode.
2912 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2913 // SPU's addresses are 256K:
2914 return (V > -(1 << 18) && V < (1 << 18) - 1);
2917 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {