1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/Constants.h"
27 #include "llvm/Function.h"
28 #include "llvm/Intrinsics.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Target/TargetOptions.h"
37 // Used in getTargetNodeName() below
39 std::map<unsigned, const char *> node_names;
41 //! MVT::ValueType mapping to useful data for Cell SPU
42 struct valtype_map_s {
43 const MVT::ValueType valtype;
44 const int prefslot_byte;
47 const valtype_map_s valtype_map[] = {
58 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
60 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
61 const valtype_map_s *retval = 0;
63 for (size_t i = 0; i < n_valtype_map; ++i) {
64 if (valtype_map[i].valtype == VT) {
65 retval = valtype_map + i;
72 cerr << "getValueTypeMapEntry returns NULL for "
73 << MVT::getValueTypeString(VT)
82 //! Predicate that returns true if operand is a memory target
84 \arg Op Operand to test
85 \return true if the operand is a memory target (i.e., global
86 address, external symbol, constant pool) or an A-form
89 bool isMemoryOperand(const SDOperand &Op)
91 const unsigned Opc = Op.getOpcode();
92 return (Opc == ISD::GlobalAddress
93 || Opc == ISD::GlobalTLSAddress
94 || Opc == ISD::JumpTable
95 || Opc == ISD::ConstantPool
96 || Opc == ISD::ExternalSymbol
97 || Opc == ISD::TargetGlobalAddress
98 || Opc == ISD::TargetGlobalTLSAddress
99 || Opc == ISD::TargetJumpTable
100 || Opc == ISD::TargetConstantPool
101 || Opc == ISD::TargetExternalSymbol
102 || Opc == SPUISD::AFormAddr);
105 //! Predicate that returns true if the operand is an indirect target
106 bool isIndirectOperand(const SDOperand &Op)
108 const unsigned Opc = Op.getOpcode();
109 return (Opc == ISD::Register
110 || Opc == SPUISD::LDRESULT);
114 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
115 : TargetLowering(TM),
118 // Fold away setcc operations if possible.
121 // Use _setjmp/_longjmp instead of setjmp/longjmp.
122 setUseUnderscoreSetJmp(true);
123 setUseUnderscoreLongJmp(true);
125 // Set up the SPU's register classes:
126 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
127 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
128 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
129 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
130 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
131 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
132 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
134 // SPU has no sign or zero extended loads for i1, i8, i16:
135 setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote);
136 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
137 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
138 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
139 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
140 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
141 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
142 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
144 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
145 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
146 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
147 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
148 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
149 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
150 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
151 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
153 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
154 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
155 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
157 // SPU constant load actions are custom lowered:
158 setOperationAction(ISD::Constant, MVT::i64, Custom);
159 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
160 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
162 // SPU's loads and stores have to be custom lowered:
163 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
165 setOperationAction(ISD::LOAD, sctype, Custom);
166 setOperationAction(ISD::STORE, sctype, Custom);
169 // Custom lower BRCOND for i1, i8 to "promote" the result to
170 // i32 and i16, respectively.
171 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
173 // Expand the jumptable branches
174 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
175 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
176 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
178 // SPU has no intrinsics for these particular operations:
179 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
181 // PowerPC has no SREM/UREM instructions
182 setOperationAction(ISD::SREM, MVT::i32, Expand);
183 setOperationAction(ISD::UREM, MVT::i32, Expand);
184 setOperationAction(ISD::SREM, MVT::i64, Expand);
185 setOperationAction(ISD::UREM, MVT::i64, Expand);
187 // We don't support sin/cos/sqrt/fmod
188 setOperationAction(ISD::FSIN , MVT::f64, Expand);
189 setOperationAction(ISD::FCOS , MVT::f64, Expand);
190 setOperationAction(ISD::FREM , MVT::f64, Expand);
191 setOperationAction(ISD::FSIN , MVT::f32, Expand);
192 setOperationAction(ISD::FCOS , MVT::f32, Expand);
193 setOperationAction(ISD::FREM , MVT::f32, Expand);
195 // If we're enabling GP optimizations, use hardware square root
196 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
197 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
199 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
200 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
202 // SPU can do rotate right and left, so legalize it... but customize for i8
203 // because instructions don't exist.
204 setOperationAction(ISD::ROTR, MVT::i32, Legal);
205 setOperationAction(ISD::ROTR, MVT::i16, Legal);
206 setOperationAction(ISD::ROTR, MVT::i8, Custom);
207 setOperationAction(ISD::ROTL, MVT::i32, Legal);
208 setOperationAction(ISD::ROTL, MVT::i16, Legal);
209 setOperationAction(ISD::ROTL, MVT::i8, Custom);
210 // SPU has no native version of shift left/right for i8
211 setOperationAction(ISD::SHL, MVT::i8, Custom);
212 setOperationAction(ISD::SRL, MVT::i8, Custom);
213 setOperationAction(ISD::SRA, MVT::i8, Custom);
214 // And SPU needs custom lowering for shift left/right for i64
215 setOperationAction(ISD::SHL, MVT::i64, Custom);
216 setOperationAction(ISD::SRL, MVT::i64, Custom);
217 setOperationAction(ISD::SRA, MVT::i64, Custom);
219 // Custom lower i32 multiplications
220 setOperationAction(ISD::MUL, MVT::i32, Custom);
222 // Need to custom handle (some) common i8, i64 math ops
223 setOperationAction(ISD::ADD, MVT::i64, Custom);
224 setOperationAction(ISD::SUB, MVT::i8, Custom);
225 setOperationAction(ISD::SUB, MVT::i64, Custom);
226 setOperationAction(ISD::MUL, MVT::i8, Custom);
228 // SPU does not have BSWAP. It does have i32 support CTLZ.
229 // CTPOP has to be custom lowered.
230 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
231 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
233 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
234 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
235 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
236 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
238 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
239 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
241 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
243 // SPU has a version of select that implements (a&~c)|(b&c), just like
244 // select ought to work:
245 setOperationAction(ISD::SELECT, MVT::i1, Promote);
246 setOperationAction(ISD::SELECT, MVT::i8, Legal);
247 setOperationAction(ISD::SELECT, MVT::i16, Legal);
248 setOperationAction(ISD::SELECT, MVT::i32, Legal);
249 setOperationAction(ISD::SELECT, MVT::i64, Expand);
251 setOperationAction(ISD::SETCC, MVT::i1, Promote);
252 setOperationAction(ISD::SETCC, MVT::i8, Legal);
253 setOperationAction(ISD::SETCC, MVT::i16, Legal);
254 setOperationAction(ISD::SETCC, MVT::i32, Legal);
255 setOperationAction(ISD::SETCC, MVT::i64, Expand);
257 // Zero extension and sign extension for i64 have to be
259 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
260 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
261 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
263 // SPU has a legal FP -> signed INT instruction
264 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
265 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
266 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
267 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
269 // FDIV on SPU requires custom lowering
270 setOperationAction(ISD::FDIV, MVT::f32, Custom);
271 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
273 // SPU has [U|S]INT_TO_FP
274 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
275 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
276 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
277 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
278 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
279 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
280 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
281 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
283 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
284 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
285 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
286 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
288 // We cannot sextinreg(i1). Expand to shifts.
289 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
291 // Support label based line numbers.
292 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
293 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
295 // We want to legalize GlobalAddress and ConstantPool nodes into the
296 // appropriate instructions to materialize the address.
297 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
299 setOperationAction(ISD::GlobalAddress, sctype, Custom);
300 setOperationAction(ISD::ConstantPool, sctype, Custom);
301 setOperationAction(ISD::JumpTable, sctype, Custom);
304 // RET must be custom lowered, to meet ABI requirements
305 setOperationAction(ISD::RET, MVT::Other, Custom);
307 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
308 setOperationAction(ISD::VASTART , MVT::Other, Custom);
310 // Use the default implementation.
311 setOperationAction(ISD::VAARG , MVT::Other, Expand);
312 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
313 setOperationAction(ISD::VAEND , MVT::Other, Expand);
314 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
315 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
316 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
317 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
319 // Cell SPU has instructions for converting between i64 and fp.
320 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
321 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
323 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
324 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
326 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
327 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
329 // First set operation action for all vector types to expand. Then we
330 // will selectively turn on ones that can be effectively codegen'd.
331 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
332 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
333 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
334 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
335 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
336 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
338 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
339 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
340 // add/sub are legal for all supported vector VT's.
341 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
342 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
343 // mul has to be custom lowered.
344 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
346 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
347 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
348 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
349 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
350 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
351 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
353 // These operations need to be expanded:
354 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
355 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
356 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
357 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
358 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
360 // Custom lower build_vector, constant pool spills, insert and
361 // extract vector elements:
362 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
363 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
364 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
365 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
366 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
367 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
370 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
371 setOperationAction(ISD::AND, MVT::v16i8, Custom);
372 setOperationAction(ISD::OR, MVT::v16i8, Custom);
373 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
374 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
376 setShiftAmountType(MVT::i32);
377 setSetCCResultContents(ZeroOrOneSetCCResult);
379 setStackPointerRegisterToSaveRestore(SPU::R1);
381 // We have target-specific dag combine patterns for the following nodes:
382 setTargetDAGCombine(ISD::ADD);
383 setTargetDAGCombine(ISD::ZERO_EXTEND);
384 setTargetDAGCombine(ISD::SIGN_EXTEND);
385 setTargetDAGCombine(ISD::ANY_EXTEND);
387 computeRegisterProperties();
391 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
393 if (node_names.empty()) {
394 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
395 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
396 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
397 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
398 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
399 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
400 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
401 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
402 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
403 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
404 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
405 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
406 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
407 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
408 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
409 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
410 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
411 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
412 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
413 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
414 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
415 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
416 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
417 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
418 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
419 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
420 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
421 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
422 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
423 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
424 "SPUISD::ROTQUAD_RZ_BYTES";
425 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
426 "SPUISD::ROTQUAD_RZ_BITS";
427 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
428 "SPUISD::ROTBYTES_RIGHT_S";
429 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
430 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
431 "SPUISD::ROTBYTES_LEFT_CHAINED";
432 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
433 "SPUISD::ROTBYTES_LEFT_BITS";
434 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
435 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
436 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
437 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
438 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
439 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
440 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
441 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
442 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
445 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
447 return ((i != node_names.end()) ? i->second : 0);
451 SPUTargetLowering::getSetCCResultType(const SDOperand &Op) const {
452 MVT::ValueType VT = Op.getValueType();
453 if (MVT::isInteger(VT))
459 //===----------------------------------------------------------------------===//
460 // Calling convention code:
461 //===----------------------------------------------------------------------===//
463 #include "SPUGenCallingConv.inc"
465 //===----------------------------------------------------------------------===//
466 // LowerOperation implementation
467 //===----------------------------------------------------------------------===//
469 /// Aligned load common code for CellSPU
471 \param[in] Op The SelectionDAG load or store operand
472 \param[in] DAG The selection DAG
473 \param[in] ST CellSPU subtarget information structure
474 \param[in,out] alignment Caller initializes this to the load or store node's
475 value from getAlignment(), may be updated while generating the aligned load
476 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
477 offset (divisible by 16, modulo 16 == 0)
478 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
479 offset of the preferred slot (modulo 16 != 0)
480 \param[in,out] VT Caller initializes this value type to the the load or store
481 node's loaded or stored value type; may be updated if an i1-extended load or
483 \param[out] was16aligned true if the base pointer had 16-byte alignment,
484 otherwise false. Can help to determine if the chunk needs to be rotated.
486 Both load and store lowering load a block of data aligned on a 16-byte
487 boundary. This is the common aligned load code shared between both.
490 AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
492 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
493 MVT::ValueType &VT, bool &was16aligned)
495 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
496 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
497 SDOperand basePtr = LSN->getBasePtr();
498 SDOperand chain = LSN->getChain();
500 if (basePtr.getOpcode() == ISD::ADD) {
501 SDOperand Op1 = basePtr.Val->getOperand(1);
503 if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
504 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
506 alignOffs = (int) CN->getValue();
507 prefSlotOffs = (int) (alignOffs & 0xf);
509 // Adjust the rotation amount to ensure that the final result ends up in
510 // the preferred slot:
511 prefSlotOffs -= vtm->prefslot_byte;
512 basePtr = basePtr.getOperand(0);
514 // Loading from memory, can we adjust alignment?
515 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
516 SDOperand APtr = basePtr.getOperand(0);
517 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
518 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
519 alignment = GSDN->getGlobal()->getAlignment();
524 prefSlotOffs = -vtm->prefslot_byte;
526 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
527 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
528 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
529 prefSlotOffs = (int) (alignOffs & 0xf);
530 prefSlotOffs -= vtm->prefslot_byte;
531 basePtr = DAG.getRegister(SPU::R1, VT);
534 prefSlotOffs = -vtm->prefslot_byte;
537 if (alignment == 16) {
538 // Realign the base pointer as a D-Form address:
539 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
540 basePtr = DAG.getNode(ISD::ADD, PtrVT,
542 DAG.getConstant((alignOffs & ~0xf), PtrVT));
545 // Emit the vector load:
547 return DAG.getLoad(MVT::v16i8, chain, basePtr,
548 LSN->getSrcValue(), LSN->getSrcValueOffset(),
549 LSN->isVolatile(), 16);
552 // Unaligned load or we're using the "large memory" model, which means that
553 // we have to be very pessimistic:
554 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
555 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
559 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
560 DAG.getConstant((alignOffs & ~0xf), PtrVT));
561 was16aligned = false;
562 return DAG.getLoad(MVT::v16i8, chain, basePtr,
563 LSN->getSrcValue(), LSN->getSrcValueOffset(),
564 LSN->isVolatile(), 16);
567 /// Custom lower loads for CellSPU
569 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
570 within a 16-byte block, we have to rotate to extract the requested element.
573 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
574 LoadSDNode *LN = cast<LoadSDNode>(Op);
575 SDOperand the_chain = LN->getChain();
576 MVT::ValueType VT = LN->getMemoryVT();
577 MVT::ValueType OpVT = Op.Val->getValueType(0);
578 ISD::LoadExtType ExtType = LN->getExtensionType();
579 unsigned alignment = LN->getAlignment();
582 switch (LN->getAddressingMode()) {
583 case ISD::UNINDEXED: {
587 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
592 the_chain = result.getValue(1);
593 // Rotate the chunk if necessary
596 if (rotamt != 0 || !was16aligned) {
597 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
602 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
604 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
605 LoadSDNode *LN1 = cast<LoadSDNode>(result);
606 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
607 DAG.getConstant(rotamt, PtrVT));
610 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
611 the_chain = result.getValue(1);
614 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
616 MVT::ValueType vecVT = MVT::v16i8;
618 // Convert the loaded v16i8 vector to the appropriate vector type
619 // specified by the operand:
622 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
624 vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
627 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
628 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
629 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
630 the_chain = result.getValue(1);
632 // Handle the sign and zero-extending loads for i1 and i8:
635 if (ExtType == ISD::SEXTLOAD) {
636 NewOpC = (OpVT == MVT::i1
637 ? SPUISD::EXTRACT_I1_SEXT
638 : SPUISD::EXTRACT_I8_SEXT);
640 assert(ExtType == ISD::ZEXTLOAD);
641 NewOpC = (OpVT == MVT::i1
642 ? SPUISD::EXTRACT_I1_ZEXT
643 : SPUISD::EXTRACT_I8_ZEXT);
646 result = DAG.getNode(NewOpC, OpVT, result);
649 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
650 SDOperand retops[2] = {
655 result = DAG.getNode(SPUISD::LDRESULT, retvts,
656 retops, sizeof(retops) / sizeof(retops[0]));
663 case ISD::LAST_INDEXED_MODE:
664 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
666 cerr << (unsigned) LN->getAddressingMode() << "\n";
674 /// Custom lower stores for CellSPU
676 All CellSPU stores are aligned to 16-byte boundaries, so for elements
677 within a 16-byte block, we have to generate a shuffle to insert the
678 requested element into its place, then store the resulting block.
681 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
682 StoreSDNode *SN = cast<StoreSDNode>(Op);
683 SDOperand Value = SN->getValue();
684 MVT::ValueType VT = Value.getValueType();
685 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
686 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
687 unsigned alignment = SN->getAlignment();
689 switch (SN->getAddressingMode()) {
690 case ISD::UNINDEXED: {
691 int chunk_offset, slot_offset;
694 // The vector type we really want to load from the 16-byte chunk, except
695 // in the case of MVT::i1, which has to be v16i8.
696 unsigned vecVT, stVecVT = MVT::v16i8;
699 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
700 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
702 SDOperand alignLoadVec =
703 AlignedLoad(Op, DAG, ST, SN, alignment,
704 chunk_offset, slot_offset, VT, was16aligned);
706 if (alignLoadVec.Val == 0)
709 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
710 SDOperand basePtr = LN->getBasePtr();
711 SDOperand the_chain = alignLoadVec.getValue(1);
712 SDOperand theValue = SN->getValue();
716 && (theValue.getOpcode() == ISD::AssertZext
717 || theValue.getOpcode() == ISD::AssertSext)) {
718 // Drill down and get the value for zero- and sign-extended
720 theValue = theValue.getOperand(0);
725 SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
726 SDOperand insertEltPtr;
727 SDOperand insertEltOp;
729 // If the base pointer is already a D-form address, then just create
730 // a new D-form address with a slot offset and the orignal base pointer.
731 // Otherwise generate a D-form address with the slot offset relative
732 // to the stack pointer, which is always aligned.
733 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
734 DEBUG(basePtr.Val->dump(&DAG));
737 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
738 (basePtr.getOpcode() == ISD::ADD
739 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
740 insertEltPtr = basePtr;
742 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
745 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
746 result = DAG.getNode(SPUISD::SHUFB, vecVT,
747 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
749 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
751 result = DAG.getStore(the_chain, result, basePtr,
752 LN->getSrcValue(), LN->getSrcValueOffset(),
753 LN->isVolatile(), LN->getAlignment());
762 case ISD::LAST_INDEXED_MODE:
763 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
765 cerr << (unsigned) SN->getAddressingMode() << "\n";
773 /// Generate the address of a constant pool entry.
775 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
776 MVT::ValueType PtrVT = Op.getValueType();
777 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
778 Constant *C = CP->getConstVal();
779 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
780 SDOperand Zero = DAG.getConstant(0, PtrVT);
781 const TargetMachine &TM = DAG.getTarget();
783 if (TM.getRelocationModel() == Reloc::Static) {
784 if (!ST->usingLargeMem()) {
785 // Just return the SDOperand with the constant pool address in it.
786 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
788 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
789 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
790 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
795 "LowerConstantPool: Relocation model other than static not supported.");
800 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
801 MVT::ValueType PtrVT = Op.getValueType();
802 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
803 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
804 SDOperand Zero = DAG.getConstant(0, PtrVT);
805 const TargetMachine &TM = DAG.getTarget();
807 if (TM.getRelocationModel() == Reloc::Static) {
808 if (!ST->usingLargeMem()) {
809 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
811 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
812 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
813 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
818 "LowerJumpTable: Relocation model other than static not supported.");
823 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
824 MVT::ValueType PtrVT = Op.getValueType();
825 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
826 GlobalValue *GV = GSDN->getGlobal();
827 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
828 const TargetMachine &TM = DAG.getTarget();
829 SDOperand Zero = DAG.getConstant(0, PtrVT);
831 if (TM.getRelocationModel() == Reloc::Static) {
832 if (!ST->usingLargeMem()) {
833 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
835 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
836 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
837 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
840 cerr << "LowerGlobalAddress: Relocation model other than static not "
849 //! Custom lower i64 integer constants
851 This code inserts all of the necessary juggling that needs to occur to load
852 a 64-bit constant into a register.
855 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
856 unsigned VT = Op.getValueType();
857 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
859 if (VT == MVT::i64) {
860 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
861 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
862 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
864 cerr << "LowerConstant: unhandled constant type "
865 << MVT::getValueTypeString(VT)
874 //! Custom lower double precision floating point constants
876 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
877 unsigned VT = Op.getValueType();
878 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
881 "LowerConstantFP: Node is not ConstantFPSDNode");
883 if (VT == MVT::f64) {
884 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
885 return DAG.getNode(ISD::BIT_CONVERT, VT,
886 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
892 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
894 LowerBRCOND(SDOperand Op, SelectionDAG &DAG)
896 SDOperand Cond = Op.getOperand(1);
897 MVT::ValueType CondVT = Cond.getValueType();
898 MVT::ValueType CondNVT;
900 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
901 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
902 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
904 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
907 return SDOperand(); // Unchanged
911 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
913 MachineFunction &MF = DAG.getMachineFunction();
914 MachineFrameInfo *MFI = MF.getFrameInfo();
915 MachineRegisterInfo &RegInfo = MF.getRegInfo();
916 SmallVector<SDOperand, 8> ArgValues;
917 SDOperand Root = Op.getOperand(0);
918 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
920 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
921 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
923 unsigned ArgOffset = SPUFrameInfo::minStackSize();
924 unsigned ArgRegIdx = 0;
925 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
927 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
929 // Add DAG nodes to load the arguments or copy them out of registers.
930 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
932 bool needsLoad = false;
933 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
934 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
938 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
939 << MVT::getValueTypeString(ObjectVT)
944 if (!isVarArg && ArgRegIdx < NumArgRegs) {
945 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
946 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
947 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
954 if (!isVarArg && ArgRegIdx < NumArgRegs) {
955 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
956 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
957 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
964 if (!isVarArg && ArgRegIdx < NumArgRegs) {
965 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
966 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
967 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
974 if (!isVarArg && ArgRegIdx < NumArgRegs) {
975 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
976 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
977 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
984 if (!isVarArg && ArgRegIdx < NumArgRegs) {
985 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
986 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
987 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
994 if (!isVarArg && ArgRegIdx < NumArgRegs) {
995 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
996 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
997 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
1009 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1010 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1011 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1012 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1020 // We need to load the argument to a virtual register if we determined above
1021 // that we ran out of physical registers of the appropriate type
1023 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1024 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1025 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1026 ArgOffset += StackSlotSize;
1029 ArgValues.push_back(ArgVal);
1032 // If the function takes variable number of arguments, make a frame index for
1033 // the start of the first vararg value... for expansion of llvm.va_start.
1035 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1037 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1038 // If this function is vararg, store any remaining integer argument regs to
1039 // their spots on the stack so that they may be loaded by deferencing the
1040 // result of va_next.
1041 SmallVector<SDOperand, 8> MemOps;
1042 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1043 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1044 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1045 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1046 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1047 MemOps.push_back(Store);
1048 // Increment the address by four for the next argument to store
1049 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1050 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1052 if (!MemOps.empty())
1053 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1056 ArgValues.push_back(Root);
1058 // Return the new list of results.
1059 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1060 Op.Val->value_end());
1061 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1064 /// isLSAAddress - Return the immediate to use if the specified
1065 /// value is representable as a LSA address.
1066 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1067 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1070 int Addr = C->getValue();
1071 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1072 (Addr << 14 >> 14) != Addr)
1073 return 0; // Top 14 bits have to be sext of immediate.
1075 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1080 LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1081 SDOperand Chain = Op.getOperand(0);
1083 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1084 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1086 SDOperand Callee = Op.getOperand(4);
1087 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1088 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1089 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1090 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1092 // Handy pointer type
1093 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1095 // Accumulate how many bytes are to be pushed on the stack, including the
1096 // linkage area, and parameter passing area. According to the SPU ABI,
1097 // we minimally need space for [LR] and [SP]
1098 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1100 // Set up a copy of the stack pointer for use loading and storing any
1101 // arguments that may not fit in the registers available for argument
1103 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1105 // Figure out which arguments are going to go in registers, and which in
1107 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1108 unsigned ArgRegIdx = 0;
1110 // Keep track of registers passing arguments
1111 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1112 // And the arguments passed on the stack
1113 SmallVector<SDOperand, 8> MemOpChains;
1115 for (unsigned i = 0; i != NumOps; ++i) {
1116 SDOperand Arg = Op.getOperand(5+2*i);
1118 // PtrOff will be used to store the current argument to the stack if a
1119 // register cannot be found for it.
1120 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1121 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1123 switch (Arg.getValueType()) {
1124 default: assert(0 && "Unexpected ValueType for argument!");
1128 if (ArgRegIdx != NumArgRegs) {
1129 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1131 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1132 ArgOffset += StackSlotSize;
1137 if (ArgRegIdx != NumArgRegs) {
1138 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1140 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1141 ArgOffset += StackSlotSize;
1148 if (ArgRegIdx != NumArgRegs) {
1149 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1151 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1152 ArgOffset += StackSlotSize;
1158 // Update number of stack bytes actually used, insert a call sequence start
1159 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1160 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1162 if (!MemOpChains.empty()) {
1163 // Adjust the stack pointer for the stack arguments.
1164 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1165 &MemOpChains[0], MemOpChains.size());
1168 // Build a sequence of copy-to-reg nodes chained together with token chain
1169 // and flag operands which copy the outgoing args into the appropriate regs.
1171 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1172 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1174 InFlag = Chain.getValue(1);
1177 std::vector<MVT::ValueType> NodeTys;
1178 NodeTys.push_back(MVT::Other); // Returns a chain
1179 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1181 SmallVector<SDOperand, 8> Ops;
1182 unsigned CallOpc = SPUISD::CALL;
1184 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1185 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1186 // node so that legalize doesn't hack it.
1187 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1188 GlobalValue *GV = G->getGlobal();
1189 unsigned CalleeVT = Callee.getValueType();
1190 SDOperand Zero = DAG.getConstant(0, PtrVT);
1191 SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1193 if (!ST->usingLargeMem()) {
1194 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1195 // style calls, otherwise, external symbols are BRASL calls. This assumes
1196 // that declared/defined symbols are in the same compilation unit and can
1197 // be reached through PC-relative jumps.
1200 // This may be an unsafe assumption for JIT and really large compilation
1202 if (GV->isDeclaration()) {
1203 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1205 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1208 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1210 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1212 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1213 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1214 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1215 // If this is an absolute destination address that appears to be a legal
1216 // local store address, use the munged value.
1217 Callee = SDOperand(Dest, 0);
1220 Ops.push_back(Chain);
1221 Ops.push_back(Callee);
1223 // Add argument registers to the end of the list so that they are known live
1225 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1226 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1227 RegsToPass[i].second.getValueType()));
1230 Ops.push_back(InFlag);
1231 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1232 InFlag = Chain.getValue(1);
1234 Chain = DAG.getCALLSEQ_END(Chain,
1235 DAG.getConstant(NumStackBytes, PtrVT),
1236 DAG.getConstant(0, PtrVT),
1238 if (Op.Val->getValueType(0) != MVT::Other)
1239 InFlag = Chain.getValue(1);
1241 SDOperand ResultVals[3];
1242 unsigned NumResults = 0;
1245 // If the call has results, copy the values out of the ret val registers.
1246 switch (Op.Val->getValueType(0)) {
1247 default: assert(0 && "Unexpected ret value!");
1248 case MVT::Other: break;
1250 if (Op.Val->getValueType(1) == MVT::i32) {
1251 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1252 ResultVals[0] = Chain.getValue(0);
1253 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1254 Chain.getValue(2)).getValue(1);
1255 ResultVals[1] = Chain.getValue(0);
1257 NodeTys.push_back(MVT::i32);
1259 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1260 ResultVals[0] = Chain.getValue(0);
1263 NodeTys.push_back(MVT::i32);
1266 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1267 ResultVals[0] = Chain.getValue(0);
1269 NodeTys.push_back(MVT::i64);
1273 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1274 InFlag).getValue(1);
1275 ResultVals[0] = Chain.getValue(0);
1277 NodeTys.push_back(Op.Val->getValueType(0));
1284 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1285 InFlag).getValue(1);
1286 ResultVals[0] = Chain.getValue(0);
1288 NodeTys.push_back(Op.Val->getValueType(0));
1292 NodeTys.push_back(MVT::Other);
1294 // If the function returns void, just return the chain.
1295 if (NumResults == 0)
1298 // Otherwise, merge everything together with a MERGE_VALUES node.
1299 ResultVals[NumResults++] = Chain;
1300 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1301 ResultVals, NumResults);
1302 return Res.getValue(Op.ResNo);
1306 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1307 SmallVector<CCValAssign, 16> RVLocs;
1308 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1309 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1310 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1311 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1313 // If this is the first return lowered for this function, add the regs to the
1314 // liveout set for the function.
1315 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1316 for (unsigned i = 0; i != RVLocs.size(); ++i)
1317 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1320 SDOperand Chain = Op.getOperand(0);
1323 // Copy the result values into the output registers.
1324 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1325 CCValAssign &VA = RVLocs[i];
1326 assert(VA.isRegLoc() && "Can only return in registers!");
1327 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1328 Flag = Chain.getValue(1);
1332 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1334 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1338 //===----------------------------------------------------------------------===//
1339 // Vector related lowering:
1340 //===----------------------------------------------------------------------===//
1342 static ConstantSDNode *
1343 getVecImm(SDNode *N) {
1344 SDOperand OpVal(0, 0);
1346 // Check to see if this buildvec has a single non-undef value in its elements.
1347 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1348 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1350 OpVal = N->getOperand(i);
1351 else if (OpVal != N->getOperand(i))
1355 if (OpVal.Val != 0) {
1356 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1361 return 0; // All UNDEF: use implicit def.; not Constant node
1364 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1365 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1367 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1368 MVT::ValueType ValueType) {
1369 if (ConstantSDNode *CN = getVecImm(N)) {
1370 uint64_t Value = CN->getValue();
1371 if (ValueType == MVT::i64) {
1372 uint64_t UValue = CN->getValue();
1373 uint32_t upper = uint32_t(UValue >> 32);
1374 uint32_t lower = uint32_t(UValue);
1377 Value = Value >> 32;
1379 if (Value <= 0x3ffff)
1380 return DAG.getConstant(Value, ValueType);
1386 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1387 /// and the value fits into a signed 16-bit constant, and if so, return the
1389 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1390 MVT::ValueType ValueType) {
1391 if (ConstantSDNode *CN = getVecImm(N)) {
1392 int64_t Value = CN->getSignExtended();
1393 if (ValueType == MVT::i64) {
1394 uint64_t UValue = CN->getValue();
1395 uint32_t upper = uint32_t(UValue >> 32);
1396 uint32_t lower = uint32_t(UValue);
1399 Value = Value >> 32;
1401 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1402 return DAG.getConstant(Value, ValueType);
1409 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1410 /// and the value fits into a signed 10-bit constant, and if so, return the
1412 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1413 MVT::ValueType ValueType) {
1414 if (ConstantSDNode *CN = getVecImm(N)) {
1415 int64_t Value = CN->getSignExtended();
1416 if (ValueType == MVT::i64) {
1417 uint64_t UValue = CN->getValue();
1418 uint32_t upper = uint32_t(UValue >> 32);
1419 uint32_t lower = uint32_t(UValue);
1422 Value = Value >> 32;
1424 if (isS10Constant(Value))
1425 return DAG.getConstant(Value, ValueType);
1431 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1432 /// and the value fits into a signed 8-bit constant, and if so, return the
1435 /// @note: The incoming vector is v16i8 because that's the only way we can load
1436 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1438 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1439 MVT::ValueType ValueType) {
1440 if (ConstantSDNode *CN = getVecImm(N)) {
1441 int Value = (int) CN->getValue();
1442 if (ValueType == MVT::i16
1443 && Value <= 0xffff /* truncated from uint64_t */
1444 && ((short) Value >> 8) == ((short) Value & 0xff))
1445 return DAG.getConstant(Value & 0xff, ValueType);
1446 else if (ValueType == MVT::i8
1447 && (Value & 0xff) == Value)
1448 return DAG.getConstant(Value, ValueType);
1454 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1455 /// and the value fits into a signed 16-bit constant, and if so, return the
1457 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1458 MVT::ValueType ValueType) {
1459 if (ConstantSDNode *CN = getVecImm(N)) {
1460 uint64_t Value = CN->getValue();
1461 if ((ValueType == MVT::i32
1462 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1463 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1464 return DAG.getConstant(Value >> 16, ValueType);
1470 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1471 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1472 if (ConstantSDNode *CN = getVecImm(N)) {
1473 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1479 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1480 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1481 if (ConstantSDNode *CN = getVecImm(N)) {
1482 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1488 // If this is a vector of constants or undefs, get the bits. A bit in
1489 // UndefBits is set if the corresponding element of the vector is an
1490 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1491 // zero. Return true if this is not an array of constants, false if it is.
1493 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1494 uint64_t UndefBits[2]) {
1495 // Start with zero'd results.
1496 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1498 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1499 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1500 SDOperand OpVal = BV->getOperand(i);
1502 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1503 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1505 uint64_t EltBits = 0;
1506 if (OpVal.getOpcode() == ISD::UNDEF) {
1507 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1508 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1510 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1511 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1512 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1513 const APFloat &apf = CN->getValueAPF();
1514 EltBits = (CN->getValueType(0) == MVT::f32
1515 ? FloatToBits(apf.convertToFloat())
1516 : DoubleToBits(apf.convertToDouble()));
1518 // Nonconstant element.
1522 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1525 //printf("%llx %llx %llx %llx\n",
1526 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1530 /// If this is a splat (repetition) of a value across the whole vector, return
1531 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1532 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1533 /// SplatSize = 1 byte.
1534 static bool isConstantSplat(const uint64_t Bits128[2],
1535 const uint64_t Undef128[2],
1537 uint64_t &SplatBits, uint64_t &SplatUndef,
1539 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1540 // the same as the lower 64-bits, ignoring undefs.
1541 uint64_t Bits64 = Bits128[0] | Bits128[1];
1542 uint64_t Undef64 = Undef128[0] & Undef128[1];
1543 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1544 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1545 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1546 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1548 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1549 if (MinSplatBits < 64) {
1551 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1553 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1554 if (MinSplatBits < 32) {
1556 // If the top 16-bits are different than the lower 16-bits, ignoring
1557 // undefs, we have an i32 splat.
1558 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1559 if (MinSplatBits < 16) {
1560 // If the top 8-bits are different than the lower 8-bits, ignoring
1561 // undefs, we have an i16 splat.
1562 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1563 // Otherwise, we have an 8-bit splat.
1564 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1565 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1571 SplatUndef = Undef16;
1578 SplatUndef = Undef32;
1584 SplatBits = Bits128[0];
1585 SplatUndef = Undef128[0];
1591 return false; // Can't be a splat if two pieces don't match.
1594 // If this is a case we can't handle, return null and let the default
1595 // expansion code take care of it. If we CAN select this case, and if it
1596 // selects to a single instruction, return Op. Otherwise, if we can codegen
1597 // this case more efficiently than a constant pool load, lower it to the
1598 // sequence of ops that should be used.
1599 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1600 MVT::ValueType VT = Op.getValueType();
1601 // If this is a vector of constants or undefs, get the bits. A bit in
1602 // UndefBits is set if the corresponding element of the vector is an
1603 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1605 uint64_t VectorBits[2];
1606 uint64_t UndefBits[2];
1607 uint64_t SplatBits, SplatUndef;
1609 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1610 || !isConstantSplat(VectorBits, UndefBits,
1611 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1612 SplatBits, SplatUndef, SplatSize))
1613 return SDOperand(); // Not a constant vector, not a splat.
1618 uint32_t Value32 = SplatBits;
1619 assert(SplatSize == 4
1620 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1621 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1622 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1623 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1624 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1628 uint64_t f64val = SplatBits;
1629 assert(SplatSize == 8
1630 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1631 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1632 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1633 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1634 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1638 // 8-bit constants have to be expanded to 16-bits
1639 unsigned short Value16 = SplatBits | (SplatBits << 8);
1641 for (int i = 0; i < 8; ++i)
1642 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1643 return DAG.getNode(ISD::BIT_CONVERT, VT,
1644 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1647 unsigned short Value16;
1649 Value16 = (unsigned short) (SplatBits & 0xffff);
1651 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1652 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1654 for (int i = 0; i < 8; ++i) Ops[i] = T;
1655 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1658 unsigned int Value = SplatBits;
1659 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1660 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1663 uint64_t val = SplatBits;
1664 uint32_t upper = uint32_t(val >> 32);
1665 uint32_t lower = uint32_t(val);
1667 if (upper == lower) {
1668 // Magic constant that can be matched by IL, ILA, et. al.
1669 SDOperand Val = DAG.getTargetConstant(val, MVT::i64);
1670 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1674 SmallVector<SDOperand, 16> ShufBytes;
1676 bool upper_special, lower_special;
1678 // NOTE: This code creates common-case shuffle masks that can be easily
1679 // detected as common expressions. It is not attempting to create highly
1680 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1682 // Detect if the upper or lower half is a special shuffle mask pattern:
1683 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1684 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1686 // Create lower vector if not a special pattern
1687 if (!lower_special) {
1688 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1689 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1690 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1691 LO32C, LO32C, LO32C, LO32C));
1694 // Create upper vector if not a special pattern
1695 if (!upper_special) {
1696 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1697 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1698 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1699 HI32C, HI32C, HI32C, HI32C));
1702 // If either upper or lower are special, then the two input operands are
1703 // the same (basically, one of them is a "don't care")
1708 if (lower_special && upper_special) {
1709 // Unhappy situation... both upper and lower are special, so punt with
1710 // a target constant:
1711 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1712 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1716 for (int i = 0; i < 4; ++i) {
1718 for (int j = 0; j < 4; ++j) {
1720 bool process_upper, process_lower;
1722 process_upper = (upper_special && (i & 1) == 0);
1723 process_lower = (lower_special && (i & 1) == 1);
1725 if (process_upper || process_lower) {
1726 if ((process_upper && upper == 0)
1727 || (process_lower && lower == 0))
1729 else if ((process_upper && upper == 0xffffffff)
1730 || (process_lower && lower == 0xffffffff))
1732 else if ((process_upper && upper == 0x80000000)
1733 || (process_lower && lower == 0x80000000))
1734 val |= (j == 0 ? 0xe0 : 0x80);
1736 val |= i * 4 + j + ((i & 1) * 16);
1739 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1742 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1743 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1744 &ShufBytes[0], ShufBytes.size()));
1752 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1753 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1754 /// permutation vector, V3, is monotonically increasing with one "exception"
1755 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1756 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1757 /// In either case, the net result is going to eventually invoke SHUFB to
1758 /// permute/shuffle the bytes from V1 and V2.
1760 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1761 /// control word for byte/halfword/word insertion. This takes care of a single
1762 /// element move from V2 into V1.
1764 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1765 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1766 SDOperand V1 = Op.getOperand(0);
1767 SDOperand V2 = Op.getOperand(1);
1768 SDOperand PermMask = Op.getOperand(2);
1770 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1772 // If we have a single element being moved from V1 to V2, this can be handled
1773 // using the C*[DX] compute mask instructions, but the vector elements have
1774 // to be monotonically increasing with one exception element.
1775 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1776 unsigned EltsFromV2 = 0;
1778 unsigned V2EltIdx0 = 0;
1779 unsigned CurrElt = 0;
1780 bool monotonic = true;
1781 if (EltVT == MVT::i8)
1783 else if (EltVT == MVT::i16)
1785 else if (EltVT == MVT::i32)
1788 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1790 for (unsigned i = 0, e = PermMask.getNumOperands();
1791 EltsFromV2 <= 1 && monotonic && i != e;
1794 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1797 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1799 if (SrcElt >= V2EltIdx0) {
1801 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1802 } else if (CurrElt != SrcElt) {
1809 if (EltsFromV2 == 1 && monotonic) {
1810 // Compute mask and shuffle
1811 MachineFunction &MF = DAG.getMachineFunction();
1812 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1813 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1814 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1815 // Initialize temporary register to 0
1816 SDOperand InitTempReg =
1817 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1818 // Copy register's contents as index in INSERT_MASK:
1819 SDOperand ShufMaskOp =
1820 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1821 DAG.getTargetConstant(V2Elt, MVT::i32),
1822 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1823 // Use shuffle mask in SHUFB synthetic instruction:
1824 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1826 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1827 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1829 SmallVector<SDOperand, 16> ResultMask;
1830 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1832 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1835 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1837 for (unsigned j = 0; j < BytesPerElement; ++j) {
1838 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1843 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1844 &ResultMask[0], ResultMask.size());
1845 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1849 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1850 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1852 if (Op0.Val->getOpcode() == ISD::Constant) {
1853 // For a constant, build the appropriate constant vector, which will
1854 // eventually simplify to a vector register load.
1856 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1857 SmallVector<SDOperand, 16> ConstVecValues;
1861 // Create a constant vector:
1862 switch (Op.getValueType()) {
1863 default: assert(0 && "Unexpected constant value type in "
1864 "LowerSCALAR_TO_VECTOR");
1865 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1866 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1867 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1868 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1869 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1870 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1873 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1874 for (size_t j = 0; j < n_copies; ++j)
1875 ConstVecValues.push_back(CValue);
1877 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1878 &ConstVecValues[0], ConstVecValues.size());
1880 // Otherwise, copy the value from one register to another:
1881 switch (Op0.getValueType()) {
1882 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1889 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1896 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1897 switch (Op.getValueType()) {
1899 SDOperand rA = Op.getOperand(0);
1900 SDOperand rB = Op.getOperand(1);
1901 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1902 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1903 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1904 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1906 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1910 // Multiply two v8i16 vectors (pipeline friendly version):
1911 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1912 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1913 // c) Use SELB to select upper and lower halves from the intermediate results
1915 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1916 // dual-issue. This code does manage to do this, even if it's a little on
1919 MachineFunction &MF = DAG.getMachineFunction();
1920 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1921 SDOperand Chain = Op.getOperand(0);
1922 SDOperand rA = Op.getOperand(0);
1923 SDOperand rB = Op.getOperand(1);
1924 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1925 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1928 DAG.getCopyToReg(Chain, FSMBIreg,
1929 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1930 DAG.getConstant(0xcccc, MVT::i16)));
1933 DAG.getCopyToReg(FSMBOp, HiProdReg,
1934 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1936 SDOperand HHProd_v4i32 =
1937 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1938 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1940 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1941 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1942 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1943 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1945 DAG.getConstant(16, MVT::i16))),
1946 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1949 // This M00sE is N@stI! (apologies to Monty Python)
1951 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1952 // is to break it all apart, sign extend, and reassemble the various
1953 // intermediate products.
1955 SDOperand rA = Op.getOperand(0);
1956 SDOperand rB = Op.getOperand(1);
1957 SDOperand c8 = DAG.getConstant(8, MVT::i32);
1958 SDOperand c16 = DAG.getConstant(16, MVT::i32);
1961 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1962 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1963 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1965 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1967 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1970 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1971 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1973 SDOperand FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1974 DAG.getConstant(0x2222, MVT::i16));
1976 SDOperand LoProdParts =
1977 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1978 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1979 LLProd, LHProd, FSMBmask));
1981 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1984 DAG.getNode(ISD::AND, MVT::v4i32,
1986 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1987 LoProdMask, LoProdMask,
1988 LoProdMask, LoProdMask));
1991 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1992 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1995 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1996 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1999 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2000 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2001 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2003 SDOperand HHProd_1 =
2004 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2005 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2006 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
2007 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2008 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
2011 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2013 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2017 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2019 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2020 DAG.getNode(ISD::OR, MVT::v4i32,
2025 cerr << "CellSPU: Unknown vector multiplication, got "
2026 << MVT::getValueTypeString(Op.getValueType())
2035 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2036 MachineFunction &MF = DAG.getMachineFunction();
2037 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2039 SDOperand A = Op.getOperand(0);
2040 SDOperand B = Op.getOperand(1);
2041 unsigned VT = Op.getValueType();
2043 unsigned VRegBR, VRegC;
2045 if (VT == MVT::f32) {
2046 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2047 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2049 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2050 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2052 // TODO: make sure we're feeding FPInterp the right arguments
2053 // Right now: fi B, frest(B)
2056 // (Floating Interpolate (FP Reciprocal Estimate B))
2058 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2059 DAG.getNode(SPUISD::FPInterp, VT, B,
2060 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2062 // Computes A * BRcpl and stores in a temporary register
2064 DAG.getCopyToReg(BRcpl, VRegC,
2065 DAG.getNode(ISD::FMUL, VT, A,
2066 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2067 // What's the Chain variable do? It's magic!
2068 // TODO: set Chain = Op(0).getEntryNode()
2070 return DAG.getNode(ISD::FADD, VT,
2071 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2072 DAG.getNode(ISD::FMUL, VT,
2073 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2074 DAG.getNode(ISD::FSUB, VT, A,
2075 DAG.getNode(ISD::FMUL, VT, B,
2076 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2079 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2080 unsigned VT = Op.getValueType();
2081 SDOperand N = Op.getOperand(0);
2082 SDOperand Elt = Op.getOperand(1);
2083 SDOperand ShufMask[16];
2084 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2086 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2088 int EltNo = (int) C->getValue();
2091 if (VT == MVT::i8 && EltNo >= 16)
2092 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2093 else if (VT == MVT::i16 && EltNo >= 8)
2094 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2095 else if (VT == MVT::i32 && EltNo >= 4)
2096 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2097 else if (VT == MVT::i64 && EltNo >= 2)
2098 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2100 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2101 // i32 and i64: Element 0 is the preferred slot
2102 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2105 // Need to generate shuffle mask and extract:
2106 int prefslot_begin = -1, prefslot_end = -1;
2107 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2111 prefslot_begin = prefslot_end = 3;
2115 prefslot_begin = 2; prefslot_end = 3;
2119 prefslot_begin = 0; prefslot_end = 3;
2123 prefslot_begin = 0; prefslot_end = 7;
2128 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2129 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2131 for (int i = 0; i < 16; ++i) {
2132 // zero fill uppper part of preferred slot, don't care about the
2134 unsigned int mask_val;
2136 if (i <= prefslot_end) {
2138 ((i < prefslot_begin)
2140 : elt_byte + (i - prefslot_begin));
2142 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2144 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2147 SDOperand ShufMaskVec =
2148 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2150 sizeof(ShufMask) / sizeof(ShufMask[0]));
2152 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2153 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2154 N, N, ShufMaskVec));
2158 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2159 SDOperand VecOp = Op.getOperand(0);
2160 SDOperand ValOp = Op.getOperand(1);
2161 SDOperand IdxOp = Op.getOperand(2);
2162 MVT::ValueType VT = Op.getValueType();
2164 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2165 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2167 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2168 // Use $2 because it's always 16-byte aligned and it's available:
2169 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2172 DAG.getNode(SPUISD::SHUFB, VT,
2173 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2175 DAG.getNode(SPUISD::INSERT_MASK, VT,
2176 DAG.getNode(ISD::ADD, PtrVT,
2178 DAG.getConstant(CN->getValue(),
2184 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2186 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2188 assert(Op.getValueType() == MVT::i8);
2191 assert(0 && "Unhandled i8 math operator");
2195 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2197 SDOperand N1 = Op.getOperand(1);
2198 N0 = (N0.getOpcode() != ISD::Constant
2199 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2200 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2201 N1 = (N1.getOpcode() != ISD::Constant
2202 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2203 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2204 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2205 DAG.getNode(Opc, MVT::i16, N0, N1));
2209 SDOperand N1 = Op.getOperand(1);
2211 N0 = (N0.getOpcode() != ISD::Constant
2212 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2213 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2214 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2215 N1 = (N1.getOpcode() != ISD::Constant
2216 ? DAG.getNode(N1Opc, MVT::i16, N1)
2217 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2218 SDOperand ExpandArg =
2219 DAG.getNode(ISD::OR, MVT::i16, N0,
2220 DAG.getNode(ISD::SHL, MVT::i16,
2221 N0, DAG.getConstant(8, MVT::i16)));
2222 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2223 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2227 SDOperand N1 = Op.getOperand(1);
2229 N0 = (N0.getOpcode() != ISD::Constant
2230 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2231 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2232 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2233 N1 = (N1.getOpcode() != ISD::Constant
2234 ? DAG.getNode(N1Opc, MVT::i16, N1)
2235 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2236 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2237 DAG.getNode(Opc, MVT::i16, N0, N1));
2240 SDOperand N1 = Op.getOperand(1);
2242 N0 = (N0.getOpcode() != ISD::Constant
2243 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2244 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2245 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2246 N1 = (N1.getOpcode() != ISD::Constant
2247 ? DAG.getNode(N1Opc, MVT::i16, N1)
2248 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2249 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2250 DAG.getNode(Opc, MVT::i16, N0, N1));
2253 SDOperand N1 = Op.getOperand(1);
2255 N0 = (N0.getOpcode() != ISD::Constant
2256 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2257 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2258 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2259 N1 = (N1.getOpcode() != ISD::Constant
2260 ? DAG.getNode(N1Opc, MVT::i16, N1)
2261 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2262 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2263 DAG.getNode(Opc, MVT::i16, N0, N1));
2271 static SDOperand LowerI64Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2273 MVT::ValueType VT = Op.getValueType();
2275 MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2277 SDOperand Op0 = Op.getOperand(0);
2280 case ISD::ZERO_EXTEND:
2281 case ISD::SIGN_EXTEND:
2282 case ISD::ANY_EXTEND: {
2283 MVT::ValueType Op0VT = Op0.getValueType();
2285 MVT::getVectorType(Op0VT, (128 / MVT::getSizeInBits(Op0VT)));
2287 assert(Op0VT == MVT::i32
2288 && "CellSPU: Zero/sign extending something other than i32");
2289 DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2291 unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2292 ? SPUISD::ROTBYTES_RIGHT_S
2293 : SPUISD::ROTQUAD_RZ_BYTES);
2294 SDOperand PromoteScalar =
2295 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2297 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2298 DAG.getNode(ISD::BIT_CONVERT, VecVT,
2299 DAG.getNode(NewOpc, Op0VecVT,
2301 DAG.getConstant(4, MVT::i32))));
2305 // Turn operands into vectors to satisfy type checking (shufb works on
2308 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2310 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2311 SmallVector<SDOperand, 16> ShufBytes;
2313 // Create the shuffle mask for "rotating" the borrow up one register slot
2314 // once the borrow is generated.
2315 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2316 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2317 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2318 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2320 SDOperand CarryGen =
2321 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2322 SDOperand ShiftedCarry =
2323 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2325 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2326 &ShufBytes[0], ShufBytes.size()));
2328 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2329 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2330 Op0, Op1, ShiftedCarry));
2334 // Turn operands into vectors to satisfy type checking (shufb works on
2337 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2339 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2340 SmallVector<SDOperand, 16> ShufBytes;
2342 // Create the shuffle mask for "rotating" the borrow up one register slot
2343 // once the borrow is generated.
2344 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2345 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2346 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2347 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2349 SDOperand BorrowGen =
2350 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2351 SDOperand ShiftedBorrow =
2352 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2353 BorrowGen, BorrowGen,
2354 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2355 &ShufBytes[0], ShufBytes.size()));
2357 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2358 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2359 Op0, Op1, ShiftedBorrow));
2363 SDOperand ShiftAmt = Op.getOperand(1);
2364 unsigned ShiftAmtVT = unsigned(ShiftAmt.getValueType());
2365 SDOperand Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2366 SDOperand MaskLower =
2367 DAG.getNode(SPUISD::SELB, VecVT,
2369 DAG.getConstant(0, VecVT),
2370 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2371 DAG.getConstant(0xff00ULL, MVT::i16)));
2372 SDOperand ShiftAmtBytes =
2373 DAG.getNode(ISD::SRL, ShiftAmtVT,
2375 DAG.getConstant(3, ShiftAmtVT));
2376 SDOperand ShiftAmtBits =
2377 DAG.getNode(ISD::AND, ShiftAmtVT,
2379 DAG.getConstant(7, ShiftAmtVT));
2381 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2382 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2383 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2384 MaskLower, ShiftAmtBytes),
2389 unsigned VT = unsigned(Op.getValueType());
2390 SDOperand ShiftAmt = Op.getOperand(1);
2391 unsigned ShiftAmtVT = unsigned(ShiftAmt.getValueType());
2392 SDOperand ShiftAmtBytes =
2393 DAG.getNode(ISD::SRL, ShiftAmtVT,
2395 DAG.getConstant(3, ShiftAmtVT));
2396 SDOperand ShiftAmtBits =
2397 DAG.getNode(ISD::AND, ShiftAmtVT,
2399 DAG.getConstant(7, ShiftAmtVT));
2401 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2402 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2403 Op0, ShiftAmtBytes),
2408 // Promote Op0 to vector
2410 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2411 SDOperand ShiftAmt = Op.getOperand(1);
2412 unsigned ShiftVT = ShiftAmt.getValueType();
2414 // Negate variable shift amounts
2415 if (!isa<ConstantSDNode>(ShiftAmt)) {
2416 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2417 DAG.getConstant(0, ShiftVT), ShiftAmt);
2420 SDOperand UpperHalfSign =
2421 DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2422 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2423 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2424 Op0, DAG.getConstant(31, MVT::i32))));
2425 SDOperand UpperHalfSignMask =
2426 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2427 SDOperand UpperLowerMask =
2428 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2429 DAG.getConstant(0xff00, MVT::i16));
2430 SDOperand UpperLowerSelect =
2431 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2432 UpperHalfSignMask, Op0, UpperLowerMask);
2433 SDOperand RotateLeftBytes =
2434 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2435 UpperLowerSelect, ShiftAmt);
2436 SDOperand RotateLeftBits =
2437 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2438 RotateLeftBytes, ShiftAmt);
2440 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2448 //! Lower byte immediate operations for v16i8 vectors:
2450 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2453 MVT::ValueType VT = Op.getValueType();
2455 ConstVec = Op.getOperand(0);
2456 Arg = Op.getOperand(1);
2457 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2458 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2459 ConstVec = ConstVec.getOperand(0);
2461 ConstVec = Op.getOperand(1);
2462 Arg = Op.getOperand(0);
2463 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2464 ConstVec = ConstVec.getOperand(0);
2469 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2470 uint64_t VectorBits[2];
2471 uint64_t UndefBits[2];
2472 uint64_t SplatBits, SplatUndef;
2475 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2476 && isConstantSplat(VectorBits, UndefBits,
2477 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2478 SplatBits, SplatUndef, SplatSize)) {
2479 SDOperand tcVec[16];
2480 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2481 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2483 // Turn the BUILD_VECTOR into a set of target constants:
2484 for (size_t i = 0; i < tcVecSize; ++i)
2487 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2488 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2495 //! Lower i32 multiplication
2496 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2500 cerr << "CellSPU: Unknown LowerMUL value type, got "
2501 << MVT::getValueTypeString(Op.getValueType())
2507 SDOperand rA = Op.getOperand(0);
2508 SDOperand rB = Op.getOperand(1);
2510 return DAG.getNode(ISD::ADD, MVT::i32,
2511 DAG.getNode(ISD::ADD, MVT::i32,
2512 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2513 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2514 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2521 //! Custom lowering for CTPOP (count population)
2523 Custom lowering code that counts the number ones in the input
2524 operand. SPU has such an instruction, but it counts the number of
2525 ones per byte, which then have to be accumulated.
2527 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2528 unsigned VT = Op.getValueType();
2529 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2533 SDOperand N = Op.getOperand(0);
2534 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2536 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2537 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2539 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2543 MachineFunction &MF = DAG.getMachineFunction();
2544 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2546 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2548 SDOperand N = Op.getOperand(0);
2549 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2550 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2551 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2553 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2554 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2556 // CNTB_result becomes the chain to which all of the virtual registers
2557 // CNTB_reg, SUM1_reg become associated:
2558 SDOperand CNTB_result =
2559 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2561 SDOperand CNTB_rescopy =
2562 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2564 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2566 return DAG.getNode(ISD::AND, MVT::i16,
2567 DAG.getNode(ISD::ADD, MVT::i16,
2568 DAG.getNode(ISD::SRL, MVT::i16,
2575 MachineFunction &MF = DAG.getMachineFunction();
2576 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2578 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2579 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2581 SDOperand N = Op.getOperand(0);
2582 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2583 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2584 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2585 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2587 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2588 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2590 // CNTB_result becomes the chain to which all of the virtual registers
2591 // CNTB_reg, SUM1_reg become associated:
2592 SDOperand CNTB_result =
2593 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2595 SDOperand CNTB_rescopy =
2596 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2599 DAG.getNode(ISD::SRL, MVT::i32,
2600 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2603 DAG.getNode(ISD::ADD, MVT::i32,
2604 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2606 SDOperand Sum1_rescopy =
2607 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2610 DAG.getNode(ISD::SRL, MVT::i32,
2611 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2614 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2615 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2617 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2627 /// LowerOperation - Provide custom lowering hooks for some operations.
2630 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2632 unsigned Opc = (unsigned) Op.getOpcode();
2633 unsigned VT = (unsigned) Op.getValueType();
2637 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2638 cerr << "Op.getOpcode() = " << Opc << "\n";
2639 cerr << "*Op.Val:\n";
2646 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2648 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2649 case ISD::ConstantPool:
2650 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2651 case ISD::GlobalAddress:
2652 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2653 case ISD::JumpTable:
2654 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2656 return LowerConstant(Op, DAG);
2657 case ISD::ConstantFP:
2658 return LowerConstantFP(Op, DAG);
2660 return LowerBRCOND(Op, DAG);
2661 case ISD::FORMAL_ARGUMENTS:
2662 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2664 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2666 return LowerRET(Op, DAG, getTargetMachine());
2669 // i8, i64 math ops:
2670 case ISD::ZERO_EXTEND:
2671 case ISD::SIGN_EXTEND:
2672 case ISD::ANY_EXTEND:
2681 return LowerI8Math(Op, DAG, Opc);
2682 else if (VT == MVT::i64)
2683 return LowerI64Math(Op, DAG, Opc);
2687 // Vector-related lowering.
2688 case ISD::BUILD_VECTOR:
2689 return LowerBUILD_VECTOR(Op, DAG);
2690 case ISD::SCALAR_TO_VECTOR:
2691 return LowerSCALAR_TO_VECTOR(Op, DAG);
2692 case ISD::VECTOR_SHUFFLE:
2693 return LowerVECTOR_SHUFFLE(Op, DAG);
2694 case ISD::EXTRACT_VECTOR_ELT:
2695 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2696 case ISD::INSERT_VECTOR_ELT:
2697 return LowerINSERT_VECTOR_ELT(Op, DAG);
2699 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2703 return LowerByteImmed(Op, DAG);
2705 // Vector and i8 multiply:
2707 if (MVT::isVector(VT))
2708 return LowerVectorMUL(Op, DAG);
2709 else if (VT == MVT::i8)
2710 return LowerI8Math(Op, DAG, Opc);
2712 return LowerMUL(Op, DAG, VT, Opc);
2715 if (VT == MVT::f32 || VT == MVT::v4f32)
2716 return LowerFDIVf32(Op, DAG);
2717 // else if (Op.getValueType() == MVT::f64)
2718 // return LowerFDIVf64(Op, DAG);
2720 assert(0 && "Calling FDIV on unsupported MVT");
2723 return LowerCTPOP(Op, DAG);
2729 //===----------------------------------------------------------------------===//
2730 // Target Optimization Hooks
2731 //===----------------------------------------------------------------------===//
2734 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2737 TargetMachine &TM = getTargetMachine();
2739 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2740 SelectionDAG &DAG = DCI.DAG;
2741 SDOperand Op0 = N->getOperand(0); // everything has at least one operand
2742 SDOperand Result; // Initially, NULL result
2744 switch (N->getOpcode()) {
2747 SDOperand Op1 = N->getOperand(1);
2749 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2750 SDOperand Op01 = Op0.getOperand(1);
2751 if (Op01.getOpcode() == ISD::Constant
2752 || Op01.getOpcode() == ISD::TargetConstant) {
2753 // (add <const>, (SPUindirect <arg>, <const>)) ->
2754 // (SPUindirect <arg>, <const + const>)
2755 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2756 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2757 SDOperand combinedConst =
2758 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2759 Op0.getValueType());
2761 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2762 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2763 DEBUG(cerr << "With: (SPUindirect <arg>, "
2764 << CN0->getValue() + CN1->getValue() << ")\n");
2765 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2766 Op0.getOperand(0), combinedConst);
2768 } else if (isa<ConstantSDNode>(Op0)
2769 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2770 SDOperand Op11 = Op1.getOperand(1);
2771 if (Op11.getOpcode() == ISD::Constant
2772 || Op11.getOpcode() == ISD::TargetConstant) {
2773 // (add (SPUindirect <arg>, <const>), <const>) ->
2774 // (SPUindirect <arg>, <const + const>)
2775 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2776 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2777 SDOperand combinedConst =
2778 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2779 Op0.getValueType());
2781 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2782 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2783 DEBUG(cerr << "With: (SPUindirect <arg>, "
2784 << CN0->getValue() + CN1->getValue() << ")\n");
2786 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2787 Op1.getOperand(0), combinedConst);
2792 case ISD::SIGN_EXTEND:
2793 case ISD::ZERO_EXTEND:
2794 case ISD::ANY_EXTEND: {
2795 if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2796 N->getValueType(0) == Op0.getValueType()) {
2797 // (any_extend (SPUextract_elt0 <arg>)) ->
2798 // (SPUextract_elt0 <arg>)
2799 // Types must match, however...
2800 DEBUG(cerr << "Replace: ");
2801 DEBUG(N->dump(&DAG));
2802 DEBUG(cerr << "\nWith: ");
2803 DEBUG(Op0.Val->dump(&DAG));
2804 DEBUG(cerr << "\n");
2810 case SPUISD::IndirectAddr: {
2811 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2812 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2813 if (CN->getValue() == 0) {
2814 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2815 // (SPUaform <addr>, 0)
2817 DEBUG(cerr << "Replace: ");
2818 DEBUG(N->dump(&DAG));
2819 DEBUG(cerr << "\nWith: ");
2820 DEBUG(Op0.Val->dump(&DAG));
2821 DEBUG(cerr << "\n");
2828 case SPUISD::SHLQUAD_L_BITS:
2829 case SPUISD::SHLQUAD_L_BYTES:
2830 case SPUISD::VEC_SHL:
2831 case SPUISD::VEC_SRL:
2832 case SPUISD::VEC_SRA:
2833 case SPUISD::ROTQUAD_RZ_BYTES:
2834 case SPUISD::ROTQUAD_RZ_BITS: {
2835 SDOperand Op1 = N->getOperand(1);
2837 if (isa<ConstantSDNode>(Op1)) {
2838 // Kill degenerate vector shifts:
2839 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2841 if (CN->getValue() == 0) {
2847 case SPUISD::PROMOTE_SCALAR: {
2848 switch (Op0.getOpcode()) {
2851 case ISD::ANY_EXTEND:
2852 case ISD::ZERO_EXTEND:
2853 case ISD::SIGN_EXTEND: {
2854 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2856 // but only if the SPUpromote_scalar and <arg> types match.
2857 SDOperand Op00 = Op0.getOperand(0);
2858 if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2859 SDOperand Op000 = Op00.getOperand(0);
2860 if (Op000.getValueType() == N->getValueType(0)) {
2866 case SPUISD::EXTRACT_ELT0: {
2867 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2869 Result = Op0.getOperand(0);
2876 // Otherwise, return unchanged.
2879 DEBUG(cerr << "\nReplace.SPU: ");
2880 DEBUG(N->dump(&DAG));
2881 DEBUG(cerr << "\nWith: ");
2882 DEBUG(Result.Val->dump(&DAG));
2883 DEBUG(cerr << "\n");
2890 //===----------------------------------------------------------------------===//
2891 // Inline Assembly Support
2892 //===----------------------------------------------------------------------===//
2894 /// getConstraintType - Given a constraint letter, return the type of
2895 /// constraint it is for this target.
2896 SPUTargetLowering::ConstraintType
2897 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2898 if (ConstraintLetter.size() == 1) {
2899 switch (ConstraintLetter[0]) {
2906 return C_RegisterClass;
2909 return TargetLowering::getConstraintType(ConstraintLetter);
2912 std::pair<unsigned, const TargetRegisterClass*>
2913 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2914 MVT::ValueType VT) const
2916 if (Constraint.size() == 1) {
2917 // GCC RS6000 Constraint Letters
2918 switch (Constraint[0]) {
2922 return std::make_pair(0U, SPU::R64CRegisterClass);
2923 return std::make_pair(0U, SPU::R32CRegisterClass);
2926 return std::make_pair(0U, SPU::R32FPRegisterClass);
2927 else if (VT == MVT::f64)
2928 return std::make_pair(0U, SPU::R64FPRegisterClass);
2931 return std::make_pair(0U, SPU::GPRCRegisterClass);
2935 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2938 //! Compute used/known bits for a SPU operand
2940 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2944 const SelectionDAG &DAG,
2945 unsigned Depth ) const {
2947 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2950 switch (Op.getOpcode()) {
2952 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2962 case SPUISD::PROMOTE_SCALAR: {
2963 SDOperand Op0 = Op.getOperand(0);
2964 MVT::ValueType Op0VT = Op0.getValueType();
2965 unsigned Op0VTBits = MVT::getSizeInBits(Op0VT);
2966 uint64_t InMask = MVT::getIntVTBitMask(Op0VT);
2967 KnownZero |= APInt(Op0VTBits, ~InMask, false);
2968 KnownOne |= APInt(Op0VTBits, InMask, false);
2972 case SPUISD::LDRESULT:
2973 case SPUISD::EXTRACT_ELT0:
2974 case SPUISD::EXTRACT_ELT0_CHAINED: {
2975 MVT::ValueType OpVT = Op.getValueType();
2976 unsigned OpVTBits = MVT::getSizeInBits(OpVT);
2977 uint64_t InMask = MVT::getIntVTBitMask(OpVT);
2978 KnownZero |= APInt(OpVTBits, ~InMask, false);
2979 KnownOne |= APInt(OpVTBits, InMask, false);
2984 case EXTRACT_I1_ZEXT:
2985 case EXTRACT_I1_SEXT:
2986 case EXTRACT_I8_ZEXT:
2987 case EXTRACT_I8_SEXT:
2992 case SPUISD::SHLQUAD_L_BITS:
2993 case SPUISD::SHLQUAD_L_BYTES:
2994 case SPUISD::VEC_SHL:
2995 case SPUISD::VEC_SRL:
2996 case SPUISD::VEC_SRA:
2997 case SPUISD::VEC_ROTL:
2998 case SPUISD::VEC_ROTR:
2999 case SPUISD::ROTQUAD_RZ_BYTES:
3000 case SPUISD::ROTQUAD_RZ_BITS:
3001 case SPUISD::ROTBYTES_RIGHT_S:
3002 case SPUISD::ROTBYTES_LEFT:
3003 case SPUISD::ROTBYTES_LEFT_CHAINED:
3004 case SPUISD::SELECT_MASK:
3006 case SPUISD::FPInterp:
3007 case SPUISD::FPRecipEst:
3008 case SPUISD::SEXT32TO64:
3013 // LowerAsmOperandForConstraint
3015 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
3016 char ConstraintLetter,
3017 std::vector<SDOperand> &Ops,
3018 SelectionDAG &DAG) const {
3019 // Default, for the time being, to the base class handler
3020 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3023 /// isLegalAddressImmediate - Return true if the integer value can be used
3024 /// as the offset of the target addressing mode.
3025 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
3026 // SPU's addresses are 256K:
3027 return (V > -(1 << 18) && V < (1 << 18) - 1);
3030 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {