1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT mapping to useful data for Cell SPU
41 struct valtype_map_s {
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDValue &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDValue &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // Initialize libcalls:
134 setLibcallName(RTLIB::MUL_I64, "__muldi3");
136 // SPU has no sign or zero extended loads for i1, i8, i16:
137 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
138 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
141 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
142 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
143 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
144 setTruncStoreAction(MVT::i8, MVT::i8, Custom);
145 setTruncStoreAction(MVT::i16, MVT::i8, Custom);
146 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
147 setTruncStoreAction(MVT::i64, MVT::i8, Custom);
148 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
150 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
151 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
152 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
154 // SPU constant load actions are custom lowered:
155 setOperationAction(ISD::Constant, MVT::i64, Custom);
156 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
157 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
159 // SPU's loads and stores have to be custom lowered:
160 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
162 MVT VT = (MVT::SimpleValueType)sctype;
164 setOperationAction(ISD::LOAD, VT, Custom);
165 setOperationAction(ISD::STORE, VT, Custom);
168 // Custom lower BRCOND for i8 to "promote" the result to i16
169 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
171 // Expand the jumptable branches
172 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
173 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
175 // Custom lower SELECT_CC for most cases, but expand by default
176 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
177 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
178 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
179 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
180 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
182 // SPU has no intrinsics for these particular operations:
183 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
185 // PowerPC has no SREM/UREM instructions
186 setOperationAction(ISD::SREM, MVT::i32, Expand);
187 setOperationAction(ISD::UREM, MVT::i32, Expand);
188 setOperationAction(ISD::SREM, MVT::i64, Expand);
189 setOperationAction(ISD::UREM, MVT::i64, Expand);
191 // We don't support sin/cos/sqrt/fmod
192 setOperationAction(ISD::FSIN , MVT::f64, Expand);
193 setOperationAction(ISD::FCOS , MVT::f64, Expand);
194 setOperationAction(ISD::FREM , MVT::f64, Expand);
195 setOperationAction(ISD::FSIN , MVT::f32, Expand);
196 setOperationAction(ISD::FCOS , MVT::f32, Expand);
197 setOperationAction(ISD::FREM , MVT::f32, Expand);
199 // If we're enabling GP optimizations, use hardware square root
200 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
201 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
203 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
204 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
206 // SPU can do rotate right and left, so legalize it... but customize for i8
207 // because instructions don't exist.
209 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
211 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
212 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
213 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
215 setOperationAction(ISD::ROTL, MVT::i32, Legal);
216 setOperationAction(ISD::ROTL, MVT::i16, Legal);
217 setOperationAction(ISD::ROTL, MVT::i8, Custom);
219 // SPU has no native version of shift left/right for i8
220 setOperationAction(ISD::SHL, MVT::i8, Custom);
221 setOperationAction(ISD::SRL, MVT::i8, Custom);
222 setOperationAction(ISD::SRA, MVT::i8, Custom);
224 // SPU needs custom lowering for shift left/right for i64
225 setOperationAction(ISD::SHL, MVT::i64, Custom);
226 setOperationAction(ISD::SRL, MVT::i64, Custom);
227 setOperationAction(ISD::SRA, MVT::i64, Custom);
229 // Custom lower i8, i32 and i64 multiplications
230 setOperationAction(ISD::MUL, MVT::i8, Custom);
231 setOperationAction(ISD::MUL, MVT::i32, Custom);
232 setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
234 // SMUL_LOHI, UMUL_LOHI
235 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
236 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
237 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom);
238 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
240 // Need to custom handle (some) common i8, i64 math ops
241 setOperationAction(ISD::ADD, MVT::i64, Custom);
242 setOperationAction(ISD::SUB, MVT::i8, Custom);
243 setOperationAction(ISD::SUB, MVT::i64, Custom);
245 // SPU does not have BSWAP. It does have i32 support CTLZ.
246 // CTPOP has to be custom lowered.
247 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
248 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
250 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
251 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
252 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
253 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
255 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
256 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
258 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
260 // SPU has a version of select that implements (a&~c)|(b&c), just like
261 // select ought to work:
262 setOperationAction(ISD::SELECT, MVT::i8, Legal);
263 setOperationAction(ISD::SELECT, MVT::i16, Legal);
264 setOperationAction(ISD::SELECT, MVT::i32, Legal);
265 setOperationAction(ISD::SELECT, MVT::i64, Expand);
267 setOperationAction(ISD::SETCC, MVT::i8, Legal);
268 setOperationAction(ISD::SETCC, MVT::i16, Legal);
269 setOperationAction(ISD::SETCC, MVT::i32, Legal);
270 setOperationAction(ISD::SETCC, MVT::i64, Expand);
272 // Zero extension and sign extension for i64 have to be
274 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
275 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
276 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
278 // SPU has a legal FP -> signed INT instruction
279 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
280 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
281 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
282 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
284 // FDIV on SPU requires custom lowering
285 setOperationAction(ISD::FDIV, MVT::f32, Custom);
286 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
288 // SPU has [U|S]INT_TO_FP
289 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
290 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
291 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
292 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
293 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
294 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
295 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
296 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
298 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
299 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
300 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
301 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
303 // We cannot sextinreg(i1). Expand to shifts.
304 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
306 // Support label based line numbers.
307 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
308 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
310 // We want to legalize GlobalAddress and ConstantPool nodes into the
311 // appropriate instructions to materialize the address.
312 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
314 MVT VT = (MVT::SimpleValueType)sctype;
316 setOperationAction(ISD::GlobalAddress, VT, Custom);
317 setOperationAction(ISD::ConstantPool, VT, Custom);
318 setOperationAction(ISD::JumpTable, VT, Custom);
321 // RET must be custom lowered, to meet ABI requirements
322 setOperationAction(ISD::RET, MVT::Other, Custom);
324 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
325 setOperationAction(ISD::VASTART , MVT::Other, Custom);
327 // Use the default implementation.
328 setOperationAction(ISD::VAARG , MVT::Other, Expand);
329 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
330 setOperationAction(ISD::VAEND , MVT::Other, Expand);
331 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
332 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
333 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
334 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
336 // Cell SPU has instructions for converting between i64 and fp.
337 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
338 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
340 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
341 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
343 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
344 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
346 // First set operation action for all vector types to expand. Then we
347 // will selectively turn on ones that can be effectively codegen'd.
348 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
349 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
350 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
351 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
352 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
353 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
355 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
356 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
357 MVT VT = (MVT::SimpleValueType)i;
359 // add/sub are legal for all supported vector VT's.
360 setOperationAction(ISD::ADD , VT, Legal);
361 setOperationAction(ISD::SUB , VT, Legal);
362 // mul has to be custom lowered.
363 setOperationAction(ISD::MUL , VT, Custom);
365 setOperationAction(ISD::AND , VT, Legal);
366 setOperationAction(ISD::OR , VT, Legal);
367 setOperationAction(ISD::XOR , VT, Legal);
368 setOperationAction(ISD::LOAD , VT, Legal);
369 setOperationAction(ISD::SELECT, VT, Legal);
370 setOperationAction(ISD::STORE, VT, Legal);
372 // These operations need to be expanded:
373 setOperationAction(ISD::SDIV, VT, Expand);
374 setOperationAction(ISD::SREM, VT, Expand);
375 setOperationAction(ISD::UDIV, VT, Expand);
376 setOperationAction(ISD::UREM, VT, Expand);
377 setOperationAction(ISD::FDIV, VT, Custom);
379 // Custom lower build_vector, constant pool spills, insert and
380 // extract vector elements:
381 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
382 setOperationAction(ISD::ConstantPool, VT, Custom);
383 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
384 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
385 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
386 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
389 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
390 setOperationAction(ISD::AND, MVT::v16i8, Custom);
391 setOperationAction(ISD::OR, MVT::v16i8, Custom);
392 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
393 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
395 setShiftAmountType(MVT::i32);
396 setBooleanContents(ZeroOrOneBooleanContent);
398 setStackPointerRegisterToSaveRestore(SPU::R1);
400 // We have target-specific dag combine patterns for the following nodes:
401 setTargetDAGCombine(ISD::ADD);
402 setTargetDAGCombine(ISD::ZERO_EXTEND);
403 setTargetDAGCombine(ISD::SIGN_EXTEND);
404 setTargetDAGCombine(ISD::ANY_EXTEND);
406 computeRegisterProperties();
408 // Set other properties:
409 setSchedulingPreference(SchedulingForLatency);
413 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
415 if (node_names.empty()) {
416 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
417 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
418 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
419 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
420 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
421 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
422 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
423 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
424 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
425 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
426 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
427 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
428 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
429 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
430 = "SPUISD::EXTRACT_ELT0_CHAINED";
431 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
432 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
433 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
434 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
435 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
436 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
437 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
438 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
439 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
440 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
441 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
442 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
443 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
444 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
445 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
446 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
447 "SPUISD::ROTQUAD_RZ_BYTES";
448 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
449 "SPUISD::ROTQUAD_RZ_BITS";
450 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
451 "SPUISD::ROTBYTES_RIGHT_S";
452 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
453 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
454 "SPUISD::ROTBYTES_LEFT_CHAINED";
455 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
456 "SPUISD::ROTBYTES_LEFT_BITS";
457 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
458 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
459 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
460 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
461 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
462 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
463 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
464 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
465 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
468 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
470 return ((i != node_names.end()) ? i->second : 0);
473 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
474 MVT VT = Op.getValueType();
475 return (VT.isInteger() ? VT : MVT(MVT::i32));
478 //===----------------------------------------------------------------------===//
479 // Calling convention code:
480 //===----------------------------------------------------------------------===//
482 #include "SPUGenCallingConv.inc"
484 //===----------------------------------------------------------------------===//
485 // LowerOperation implementation
486 //===----------------------------------------------------------------------===//
488 /// Aligned load common code for CellSPU
490 \param[in] Op The SelectionDAG load or store operand
491 \param[in] DAG The selection DAG
492 \param[in] ST CellSPU subtarget information structure
493 \param[in,out] alignment Caller initializes this to the load or store node's
494 value from getAlignment(), may be updated while generating the aligned load
495 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
496 offset (divisible by 16, modulo 16 == 0)
497 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
498 offset of the preferred slot (modulo 16 != 0)
499 \param[in,out] VT Caller initializes this value type to the the load or store
500 node's loaded or stored value type; may be updated if an i1-extended load or
502 \param[out] was16aligned true if the base pointer had 16-byte alignment,
503 otherwise false. Can help to determine if the chunk needs to be rotated.
505 Both load and store lowering load a block of data aligned on a 16-byte
506 boundary. This is the common aligned load code shared between both.
509 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
511 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
512 MVT &VT, bool &was16aligned)
514 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
515 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
516 SDValue basePtr = LSN->getBasePtr();
517 SDValue chain = LSN->getChain();
519 if (basePtr.getOpcode() == ISD::ADD) {
520 SDValue Op1 = basePtr.getNode()->getOperand(1);
522 if (Op1.getOpcode() == ISD::Constant
523 || Op1.getOpcode() == ISD::TargetConstant) {
524 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
526 alignOffs = (int) CN->getZExtValue();
527 prefSlotOffs = (int) (alignOffs & 0xf);
529 // Adjust the rotation amount to ensure that the final result ends up in
530 // the preferred slot:
531 prefSlotOffs -= vtm->prefslot_byte;
532 basePtr = basePtr.getOperand(0);
534 // Loading from memory, can we adjust alignment?
535 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
536 SDValue APtr = basePtr.getOperand(0);
537 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
538 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
539 alignment = GSDN->getGlobal()->getAlignment();
544 prefSlotOffs = -vtm->prefslot_byte;
546 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
547 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
548 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
549 prefSlotOffs = (int) (alignOffs & 0xf);
550 prefSlotOffs -= vtm->prefslot_byte;
551 basePtr = DAG.getRegister(SPU::R1, VT);
554 prefSlotOffs = -vtm->prefslot_byte;
557 if (alignment == 16) {
558 // Realign the base pointer as a D-Form address:
559 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
560 basePtr = DAG.getNode(ISD::ADD, PtrVT,
562 DAG.getConstant((alignOffs & ~0xf), PtrVT));
565 // Emit the vector load:
567 return DAG.getLoad(MVT::v16i8, chain, basePtr,
568 LSN->getSrcValue(), LSN->getSrcValueOffset(),
569 LSN->isVolatile(), 16);
572 // Unaligned load or we're using the "large memory" model, which means that
573 // we have to be very pessimistic:
574 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
575 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
576 DAG.getConstant(0, PtrVT));
580 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
581 DAG.getConstant((alignOffs & ~0xf), PtrVT));
582 was16aligned = false;
583 return DAG.getLoad(MVT::v16i8, chain, basePtr,
584 LSN->getSrcValue(), LSN->getSrcValueOffset(),
585 LSN->isVolatile(), 16);
588 /// Custom lower loads for CellSPU
590 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
591 within a 16-byte block, we have to rotate to extract the requested element.
594 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
595 LoadSDNode *LN = cast<LoadSDNode>(Op);
596 SDValue the_chain = LN->getChain();
597 MVT VT = LN->getMemoryVT();
598 MVT OpVT = Op.getNode()->getValueType(0);
599 ISD::LoadExtType ExtType = LN->getExtensionType();
600 unsigned alignment = LN->getAlignment();
603 switch (LN->getAddressingMode()) {
604 case ISD::UNINDEXED: {
608 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
610 if (result.getNode() == 0)
613 the_chain = result.getValue(1);
614 // Rotate the chunk if necessary
617 if (rotamt != 0 || !was16aligned) {
618 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
623 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
625 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
626 LoadSDNode *LN1 = cast<LoadSDNode>(result);
627 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
628 DAG.getConstant(rotamt, PtrVT));
631 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
632 the_chain = result.getValue(1);
635 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
637 MVT vecVT = MVT::v16i8;
639 // Convert the loaded v16i8 vector to the appropriate vector type
640 // specified by the operand:
643 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
645 vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
648 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
649 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
650 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
651 the_chain = result.getValue(1);
653 // Handle the sign and zero-extending loads for i1 and i8:
656 if (ExtType == ISD::SEXTLOAD) {
657 NewOpC = (OpVT == MVT::i1
658 ? SPUISD::EXTRACT_I1_SEXT
659 : SPUISD::EXTRACT_I8_SEXT);
661 assert(ExtType == ISD::ZEXTLOAD);
662 NewOpC = (OpVT == MVT::i1
663 ? SPUISD::EXTRACT_I1_ZEXT
664 : SPUISD::EXTRACT_I8_ZEXT);
667 result = DAG.getNode(NewOpC, OpVT, result);
670 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
671 SDValue retops[2] = {
676 result = DAG.getNode(SPUISD::LDRESULT, retvts,
677 retops, sizeof(retops) / sizeof(retops[0]));
684 case ISD::LAST_INDEXED_MODE:
685 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
687 cerr << (unsigned) LN->getAddressingMode() << "\n";
695 /// Custom lower stores for CellSPU
697 All CellSPU stores are aligned to 16-byte boundaries, so for elements
698 within a 16-byte block, we have to generate a shuffle to insert the
699 requested element into its place, then store the resulting block.
702 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
703 StoreSDNode *SN = cast<StoreSDNode>(Op);
704 SDValue Value = SN->getValue();
705 MVT VT = Value.getValueType();
706 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
707 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
708 unsigned alignment = SN->getAlignment();
710 switch (SN->getAddressingMode()) {
711 case ISD::UNINDEXED: {
712 int chunk_offset, slot_offset;
715 // The vector type we really want to load from the 16-byte chunk.
716 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
717 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
719 SDValue alignLoadVec =
720 AlignedLoad(Op, DAG, ST, SN, alignment,
721 chunk_offset, slot_offset, VT, was16aligned);
723 if (alignLoadVec.getNode() == 0)
726 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
727 SDValue basePtr = LN->getBasePtr();
728 SDValue the_chain = alignLoadVec.getValue(1);
729 SDValue theValue = SN->getValue();
733 && (theValue.getOpcode() == ISD::AssertZext
734 || theValue.getOpcode() == ISD::AssertSext)) {
735 // Drill down and get the value for zero- and sign-extended
737 theValue = theValue.getOperand(0);
742 SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
743 SDValue insertEltPtr;
745 // If the base pointer is already a D-form address, then just create
746 // a new D-form address with a slot offset and the orignal base pointer.
747 // Otherwise generate a D-form address with the slot offset relative
748 // to the stack pointer, which is always aligned.
749 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
750 DEBUG(basePtr.getNode()->dump(&DAG));
753 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
754 (basePtr.getOpcode() == ISD::ADD
755 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
756 insertEltPtr = basePtr;
758 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
761 SDValue insertEltOp =
762 DAG.getNode(SPUISD::SHUFFLE_MASK, stVecVT, insertEltPtr);
763 SDValue vectorizeOp =
764 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
766 result = DAG.getNode(SPUISD::SHUFB, vecVT, vectorizeOp, alignLoadVec,
767 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
769 result = DAG.getStore(the_chain, result, basePtr,
770 LN->getSrcValue(), LN->getSrcValueOffset(),
771 LN->isVolatile(), LN->getAlignment());
773 #if 0 && defined(NDEBUG)
774 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
775 const SDValue ¤tRoot = DAG.getRoot();
778 cerr << "------- CellSPU:LowerStore result:\n";
781 DAG.setRoot(currentRoot);
792 case ISD::LAST_INDEXED_MODE:
793 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
795 cerr << (unsigned) SN->getAddressingMode() << "\n";
803 /// Generate the address of a constant pool entry.
805 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
806 MVT PtrVT = Op.getValueType();
807 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
808 Constant *C = CP->getConstVal();
809 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
810 SDValue Zero = DAG.getConstant(0, PtrVT);
811 const TargetMachine &TM = DAG.getTarget();
813 if (TM.getRelocationModel() == Reloc::Static) {
814 if (!ST->usingLargeMem()) {
815 // Just return the SDValue with the constant pool address in it.
816 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
818 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
819 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
820 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
825 "LowerConstantPool: Relocation model other than static"
831 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
832 MVT PtrVT = Op.getValueType();
833 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
834 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
835 SDValue Zero = DAG.getConstant(0, PtrVT);
836 const TargetMachine &TM = DAG.getTarget();
838 if (TM.getRelocationModel() == Reloc::Static) {
839 if (!ST->usingLargeMem()) {
840 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
842 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
843 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
844 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
849 "LowerJumpTable: Relocation model other than static not supported.");
854 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
855 MVT PtrVT = Op.getValueType();
856 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
857 GlobalValue *GV = GSDN->getGlobal();
858 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
859 const TargetMachine &TM = DAG.getTarget();
860 SDValue Zero = DAG.getConstant(0, PtrVT);
862 if (TM.getRelocationModel() == Reloc::Static) {
863 if (!ST->usingLargeMem()) {
864 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
866 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
867 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
868 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
871 cerr << "LowerGlobalAddress: Relocation model other than static not "
880 //! Custom lower i64 integer constants
882 This code inserts all of the necessary juggling that needs to occur to load
883 a 64-bit constant into a register.
886 LowerConstant(SDValue Op, SelectionDAG &DAG) {
887 MVT VT = Op.getValueType();
888 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
890 if (VT == MVT::i64) {
891 SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
892 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
893 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
895 cerr << "LowerConstant: unhandled constant type "
905 //! Custom lower double precision floating point constants
907 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
908 MVT VT = Op.getValueType();
909 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
912 "LowerConstantFP: Node is not ConstantFPSDNode");
914 if (VT == MVT::f64) {
915 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
916 return DAG.getNode(ISD::BIT_CONVERT, VT,
917 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
923 //! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
925 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
927 SDValue Cond = Op.getOperand(1);
928 MVT CondVT = Cond.getValueType();
931 if (CondVT == MVT::i8) {
933 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
935 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
938 return SDValue(); // Unchanged
942 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
944 MachineFunction &MF = DAG.getMachineFunction();
945 MachineFrameInfo *MFI = MF.getFrameInfo();
946 MachineRegisterInfo &RegInfo = MF.getRegInfo();
947 SmallVector<SDValue, 48> ArgValues;
948 SDValue Root = Op.getOperand(0);
949 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
951 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
952 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
954 unsigned ArgOffset = SPUFrameInfo::minStackSize();
955 unsigned ArgRegIdx = 0;
956 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
958 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
960 // Add DAG nodes to load the arguments or copy them out of registers.
961 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
962 ArgNo != e; ++ArgNo) {
963 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
964 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
967 if (ArgRegIdx < NumArgRegs) {
968 const TargetRegisterClass *ArgRegClass;
970 switch (ObjectVT.getSimpleVT()) {
972 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
973 << ObjectVT.getMVTString()
978 ArgRegClass = &SPU::R8CRegClass;
981 ArgRegClass = &SPU::R16CRegClass;
984 ArgRegClass = &SPU::R32CRegClass;
987 ArgRegClass = &SPU::R64CRegClass;
990 ArgRegClass = &SPU::R32FPRegClass;
993 ArgRegClass = &SPU::R64FPRegClass;
1001 ArgRegClass = &SPU::VECREGRegClass;
1005 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1006 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1007 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1010 // We need to load the argument to a virtual register if we determined
1011 // above that we ran out of physical registers of the appropriate type
1012 // or we're forced to do vararg
1013 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1014 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1015 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1016 ArgOffset += StackSlotSize;
1019 ArgValues.push_back(ArgVal);
1021 Root = ArgVal.getOperand(0);
1026 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1027 // We will spill (79-3)+1 registers to the stack
1028 SmallVector<SDValue, 79-3+1> MemOps;
1030 // Create the frame slot
1032 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1033 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1034 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1035 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1036 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1037 Root = Store.getOperand(0);
1038 MemOps.push_back(Store);
1040 // Increment address by stack slot size for the next stored argument
1041 ArgOffset += StackSlotSize;
1043 if (!MemOps.empty())
1044 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1047 ArgValues.push_back(Root);
1049 // Return the new list of results.
1050 return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1054 /// isLSAAddress - Return the immediate to use if the specified
1055 /// value is representable as a LSA address.
1056 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1057 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1060 int Addr = C->getZExtValue();
1061 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1062 (Addr << 14 >> 14) != Addr)
1063 return 0; // Top 14 bits have to be sext of immediate.
1065 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1070 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1071 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1072 SDValue Chain = TheCall->getChain();
1073 SDValue Callee = TheCall->getCallee();
1074 unsigned NumOps = TheCall->getNumArgs();
1075 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1076 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1077 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1079 // Handy pointer type
1080 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1082 // Accumulate how many bytes are to be pushed on the stack, including the
1083 // linkage area, and parameter passing area. According to the SPU ABI,
1084 // we minimally need space for [LR] and [SP]
1085 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1087 // Set up a copy of the stack pointer for use loading and storing any
1088 // arguments that may not fit in the registers available for argument
1090 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1092 // Figure out which arguments are going to go in registers, and which in
1094 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1095 unsigned ArgRegIdx = 0;
1097 // Keep track of registers passing arguments
1098 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1099 // And the arguments passed on the stack
1100 SmallVector<SDValue, 8> MemOpChains;
1102 for (unsigned i = 0; i != NumOps; ++i) {
1103 SDValue Arg = TheCall->getArg(i);
1105 // PtrOff will be used to store the current argument to the stack if a
1106 // register cannot be found for it.
1107 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1108 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1110 switch (Arg.getValueType().getSimpleVT()) {
1111 default: assert(0 && "Unexpected ValueType for argument!");
1115 if (ArgRegIdx != NumArgRegs) {
1116 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1118 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1119 ArgOffset += StackSlotSize;
1124 if (ArgRegIdx != NumArgRegs) {
1125 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1127 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1128 ArgOffset += StackSlotSize;
1135 if (ArgRegIdx != NumArgRegs) {
1136 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1138 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1139 ArgOffset += StackSlotSize;
1145 // Update number of stack bytes actually used, insert a call sequence start
1146 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1147 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1150 if (!MemOpChains.empty()) {
1151 // Adjust the stack pointer for the stack arguments.
1152 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1153 &MemOpChains[0], MemOpChains.size());
1156 // Build a sequence of copy-to-reg nodes chained together with token chain
1157 // and flag operands which copy the outgoing args into the appropriate regs.
1159 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1160 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1162 InFlag = Chain.getValue(1);
1165 SmallVector<SDValue, 8> Ops;
1166 unsigned CallOpc = SPUISD::CALL;
1168 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1169 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1170 // node so that legalize doesn't hack it.
1171 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1172 GlobalValue *GV = G->getGlobal();
1173 MVT CalleeVT = Callee.getValueType();
1174 SDValue Zero = DAG.getConstant(0, PtrVT);
1175 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1177 if (!ST->usingLargeMem()) {
1178 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1179 // style calls, otherwise, external symbols are BRASL calls. This assumes
1180 // that declared/defined symbols are in the same compilation unit and can
1181 // be reached through PC-relative jumps.
1184 // This may be an unsafe assumption for JIT and really large compilation
1186 if (GV->isDeclaration()) {
1187 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1189 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1192 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1194 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1196 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1197 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1198 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1199 // If this is an absolute destination address that appears to be a legal
1200 // local store address, use the munged value.
1201 Callee = SDValue(Dest, 0);
1204 Ops.push_back(Chain);
1205 Ops.push_back(Callee);
1207 // Add argument registers to the end of the list so that they are known live
1209 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1210 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1211 RegsToPass[i].second.getValueType()));
1213 if (InFlag.getNode())
1214 Ops.push_back(InFlag);
1215 // Returns a chain and a flag for retval copy to use.
1216 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1217 &Ops[0], Ops.size());
1218 InFlag = Chain.getValue(1);
1220 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1221 DAG.getIntPtrConstant(0, true), InFlag);
1222 if (TheCall->getValueType(0) != MVT::Other)
1223 InFlag = Chain.getValue(1);
1225 SDValue ResultVals[3];
1226 unsigned NumResults = 0;
1228 // If the call has results, copy the values out of the ret val registers.
1229 switch (TheCall->getValueType(0).getSimpleVT()) {
1230 default: assert(0 && "Unexpected ret value!");
1231 case MVT::Other: break;
1233 if (TheCall->getValueType(1) == MVT::i32) {
1234 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1235 ResultVals[0] = Chain.getValue(0);
1236 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1237 Chain.getValue(2)).getValue(1);
1238 ResultVals[1] = Chain.getValue(0);
1241 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1242 ResultVals[0] = Chain.getValue(0);
1247 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1248 ResultVals[0] = Chain.getValue(0);
1253 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1254 InFlag).getValue(1);
1255 ResultVals[0] = Chain.getValue(0);
1263 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1264 InFlag).getValue(1);
1265 ResultVals[0] = Chain.getValue(0);
1270 // If the function returns void, just return the chain.
1271 if (NumResults == 0)
1274 // Otherwise, merge everything together with a MERGE_VALUES node.
1275 ResultVals[NumResults++] = Chain;
1276 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1277 return Res.getValue(Op.getResNo());
1281 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1282 SmallVector<CCValAssign, 16> RVLocs;
1283 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1284 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1285 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1286 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1288 // If this is the first return lowered for this function, add the regs to the
1289 // liveout set for the function.
1290 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1291 for (unsigned i = 0; i != RVLocs.size(); ++i)
1292 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1295 SDValue Chain = Op.getOperand(0);
1298 // Copy the result values into the output registers.
1299 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1300 CCValAssign &VA = RVLocs[i];
1301 assert(VA.isRegLoc() && "Can only return in registers!");
1302 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1303 Flag = Chain.getValue(1);
1307 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1309 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1313 //===----------------------------------------------------------------------===//
1314 // Vector related lowering:
1315 //===----------------------------------------------------------------------===//
1317 static ConstantSDNode *
1318 getVecImm(SDNode *N) {
1319 SDValue OpVal(0, 0);
1321 // Check to see if this buildvec has a single non-undef value in its elements.
1322 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1323 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1324 if (OpVal.getNode() == 0)
1325 OpVal = N->getOperand(i);
1326 else if (OpVal != N->getOperand(i))
1330 if (OpVal.getNode() != 0) {
1331 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1336 return 0; // All UNDEF: use implicit def.; not Constant node
1339 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1340 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1342 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1344 if (ConstantSDNode *CN = getVecImm(N)) {
1345 uint64_t Value = CN->getZExtValue();
1346 if (ValueType == MVT::i64) {
1347 uint64_t UValue = CN->getZExtValue();
1348 uint32_t upper = uint32_t(UValue >> 32);
1349 uint32_t lower = uint32_t(UValue);
1352 Value = Value >> 32;
1354 if (Value <= 0x3ffff)
1355 return DAG.getTargetConstant(Value, ValueType);
1361 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1362 /// and the value fits into a signed 16-bit constant, and if so, return the
1364 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1366 if (ConstantSDNode *CN = getVecImm(N)) {
1367 int64_t Value = CN->getSExtValue();
1368 if (ValueType == MVT::i64) {
1369 uint64_t UValue = CN->getZExtValue();
1370 uint32_t upper = uint32_t(UValue >> 32);
1371 uint32_t lower = uint32_t(UValue);
1374 Value = Value >> 32;
1376 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1377 return DAG.getTargetConstant(Value, ValueType);
1384 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1385 /// and the value fits into a signed 10-bit constant, and if so, return the
1387 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1389 if (ConstantSDNode *CN = getVecImm(N)) {
1390 int64_t Value = CN->getSExtValue();
1391 if (ValueType == MVT::i64) {
1392 uint64_t UValue = CN->getZExtValue();
1393 uint32_t upper = uint32_t(UValue >> 32);
1394 uint32_t lower = uint32_t(UValue);
1397 Value = Value >> 32;
1399 if (isS10Constant(Value))
1400 return DAG.getTargetConstant(Value, ValueType);
1406 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1407 /// and the value fits into a signed 8-bit constant, and if so, return the
1410 /// @note: The incoming vector is v16i8 because that's the only way we can load
1411 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1413 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1415 if (ConstantSDNode *CN = getVecImm(N)) {
1416 int Value = (int) CN->getZExtValue();
1417 if (ValueType == MVT::i16
1418 && Value <= 0xffff /* truncated from uint64_t */
1419 && ((short) Value >> 8) == ((short) Value & 0xff))
1420 return DAG.getTargetConstant(Value & 0xff, ValueType);
1421 else if (ValueType == MVT::i8
1422 && (Value & 0xff) == Value)
1423 return DAG.getTargetConstant(Value, ValueType);
1429 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1430 /// and the value fits into a signed 16-bit constant, and if so, return the
1432 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1434 if (ConstantSDNode *CN = getVecImm(N)) {
1435 uint64_t Value = CN->getZExtValue();
1436 if ((ValueType == MVT::i32
1437 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1438 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1439 return DAG.getTargetConstant(Value >> 16, ValueType);
1445 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1446 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1447 if (ConstantSDNode *CN = getVecImm(N)) {
1448 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1454 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1455 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1456 if (ConstantSDNode *CN = getVecImm(N)) {
1457 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1463 // If this is a vector of constants or undefs, get the bits. A bit in
1464 // UndefBits is set if the corresponding element of the vector is an
1465 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1466 // zero. Return true if this is not an array of constants, false if it is.
1468 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1469 uint64_t UndefBits[2]) {
1470 // Start with zero'd results.
1471 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1473 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1474 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1475 SDValue OpVal = BV->getOperand(i);
1477 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1478 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1480 uint64_t EltBits = 0;
1481 if (OpVal.getOpcode() == ISD::UNDEF) {
1482 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1483 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1485 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1486 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1487 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1488 const APFloat &apf = CN->getValueAPF();
1489 EltBits = (CN->getValueType(0) == MVT::f32
1490 ? FloatToBits(apf.convertToFloat())
1491 : DoubleToBits(apf.convertToDouble()));
1493 // Nonconstant element.
1497 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1500 //printf("%llx %llx %llx %llx\n",
1501 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1505 /// If this is a splat (repetition) of a value across the whole vector, return
1506 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1507 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1508 /// SplatSize = 1 byte.
1509 static bool isConstantSplat(const uint64_t Bits128[2],
1510 const uint64_t Undef128[2],
1512 uint64_t &SplatBits, uint64_t &SplatUndef,
1514 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1515 // the same as the lower 64-bits, ignoring undefs.
1516 uint64_t Bits64 = Bits128[0] | Bits128[1];
1517 uint64_t Undef64 = Undef128[0] & Undef128[1];
1518 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1519 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1520 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1521 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1523 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1524 if (MinSplatBits < 64) {
1526 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1528 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1529 if (MinSplatBits < 32) {
1531 // If the top 16-bits are different than the lower 16-bits, ignoring
1532 // undefs, we have an i32 splat.
1533 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1534 if (MinSplatBits < 16) {
1535 // If the top 8-bits are different than the lower 8-bits, ignoring
1536 // undefs, we have an i16 splat.
1537 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1538 == ((Bits16 >> 8) & ~Undef16)) {
1539 // Otherwise, we have an 8-bit splat.
1540 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1541 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1547 SplatUndef = Undef16;
1554 SplatUndef = Undef32;
1560 SplatBits = Bits128[0];
1561 SplatUndef = Undef128[0];
1567 return false; // Can't be a splat if two pieces don't match.
1570 // If this is a case we can't handle, return null and let the default
1571 // expansion code take care of it. If we CAN select this case, and if it
1572 // selects to a single instruction, return Op. Otherwise, if we can codegen
1573 // this case more efficiently than a constant pool load, lower it to the
1574 // sequence of ops that should be used.
1575 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1576 MVT VT = Op.getValueType();
1577 // If this is a vector of constants or undefs, get the bits. A bit in
1578 // UndefBits is set if the corresponding element of the vector is an
1579 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1581 uint64_t VectorBits[2];
1582 uint64_t UndefBits[2];
1583 uint64_t SplatBits, SplatUndef;
1585 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1586 || !isConstantSplat(VectorBits, UndefBits,
1587 VT.getVectorElementType().getSizeInBits(),
1588 SplatBits, SplatUndef, SplatSize))
1589 return SDValue(); // Not a constant vector, not a splat.
1591 switch (VT.getSimpleVT()) {
1594 uint32_t Value32 = SplatBits;
1595 assert(SplatSize == 4
1596 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1597 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1598 SDValue T = DAG.getConstant(Value32, MVT::i32);
1599 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1600 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1604 uint64_t f64val = SplatBits;
1605 assert(SplatSize == 8
1606 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1607 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1608 SDValue T = DAG.getConstant(f64val, MVT::i64);
1609 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1610 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1614 // 8-bit constants have to be expanded to 16-bits
1615 unsigned short Value16 = SplatBits | (SplatBits << 8);
1617 for (int i = 0; i < 8; ++i)
1618 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1619 return DAG.getNode(ISD::BIT_CONVERT, VT,
1620 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1623 unsigned short Value16;
1625 Value16 = (unsigned short) (SplatBits & 0xffff);
1627 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1628 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1630 for (int i = 0; i < 8; ++i) Ops[i] = T;
1631 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1634 unsigned int Value = SplatBits;
1635 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1636 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1639 uint64_t val = SplatBits;
1640 uint32_t upper = uint32_t(val >> 32);
1641 uint32_t lower = uint32_t(val);
1643 if (upper == lower) {
1644 // Magic constant that can be matched by IL, ILA, et. al.
1645 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1646 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1650 SmallVector<SDValue, 16> ShufBytes;
1652 bool upper_special, lower_special;
1654 // NOTE: This code creates common-case shuffle masks that can be easily
1655 // detected as common expressions. It is not attempting to create highly
1656 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1658 // Detect if the upper or lower half is a special shuffle mask pattern:
1659 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1660 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1662 // Create lower vector if not a special pattern
1663 if (!lower_special) {
1664 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1665 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1666 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1667 LO32C, LO32C, LO32C, LO32C));
1670 // Create upper vector if not a special pattern
1671 if (!upper_special) {
1672 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1673 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1674 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1675 HI32C, HI32C, HI32C, HI32C));
1678 // If either upper or lower are special, then the two input operands are
1679 // the same (basically, one of them is a "don't care")
1684 if (lower_special && upper_special) {
1685 // Unhappy situation... both upper and lower are special, so punt with
1686 // a target constant:
1687 SDValue Zero = DAG.getConstant(0, MVT::i32);
1688 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1692 for (int i = 0; i < 4; ++i) {
1694 for (int j = 0; j < 4; ++j) {
1696 bool process_upper, process_lower;
1698 process_upper = (upper_special && (i & 1) == 0);
1699 process_lower = (lower_special && (i & 1) == 1);
1701 if (process_upper || process_lower) {
1702 if ((process_upper && upper == 0)
1703 || (process_lower && lower == 0))
1705 else if ((process_upper && upper == 0xffffffff)
1706 || (process_lower && lower == 0xffffffff))
1708 else if ((process_upper && upper == 0x80000000)
1709 || (process_lower && lower == 0x80000000))
1710 val |= (j == 0 ? 0xe0 : 0x80);
1712 val |= i * 4 + j + ((i & 1) * 16);
1715 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1718 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1719 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1720 &ShufBytes[0], ShufBytes.size()));
1728 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1729 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1730 /// permutation vector, V3, is monotonically increasing with one "exception"
1731 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1732 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1733 /// In either case, the net result is going to eventually invoke SHUFB to
1734 /// permute/shuffle the bytes from V1 and V2.
1736 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1737 /// control word for byte/halfword/word insertion. This takes care of a single
1738 /// element move from V2 into V1.
1740 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1741 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1742 SDValue V1 = Op.getOperand(0);
1743 SDValue V2 = Op.getOperand(1);
1744 SDValue PermMask = Op.getOperand(2);
1746 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1748 // If we have a single element being moved from V1 to V2, this can be handled
1749 // using the C*[DX] compute mask instructions, but the vector elements have
1750 // to be monotonically increasing with one exception element.
1751 MVT EltVT = V1.getValueType().getVectorElementType();
1752 unsigned EltsFromV2 = 0;
1754 unsigned V2EltIdx0 = 0;
1755 unsigned CurrElt = 0;
1756 bool monotonic = true;
1757 if (EltVT == MVT::i8)
1759 else if (EltVT == MVT::i16)
1761 else if (EltVT == MVT::i32)
1764 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1766 for (unsigned i = 0, e = PermMask.getNumOperands();
1767 EltsFromV2 <= 1 && monotonic && i != e;
1770 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1773 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1775 if (SrcElt >= V2EltIdx0) {
1777 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1778 } else if (CurrElt != SrcElt) {
1785 if (EltsFromV2 == 1 && monotonic) {
1786 // Compute mask and shuffle
1787 MachineFunction &MF = DAG.getMachineFunction();
1788 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1789 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1790 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1791 // Initialize temporary register to 0
1792 SDValue InitTempReg =
1793 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1794 // Copy register's contents as index in SHUFFLE_MASK:
1795 SDValue ShufMaskOp =
1796 DAG.getNode(SPUISD::SHUFFLE_MASK, V1.getValueType(),
1797 DAG.getTargetConstant(V2Elt, MVT::i32),
1798 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1799 // Use shuffle mask in SHUFB synthetic instruction:
1800 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1802 // Convert the SHUFFLE_VECTOR mask's input element units to the
1804 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1806 SmallVector<SDValue, 16> ResultMask;
1807 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1809 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1812 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1814 for (unsigned j = 0; j < BytesPerElement; ++j) {
1815 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1820 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1821 &ResultMask[0], ResultMask.size());
1822 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1826 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1827 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1829 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1830 // For a constant, build the appropriate constant vector, which will
1831 // eventually simplify to a vector register load.
1833 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1834 SmallVector<SDValue, 16> ConstVecValues;
1838 // Create a constant vector:
1839 switch (Op.getValueType().getSimpleVT()) {
1840 default: assert(0 && "Unexpected constant value type in "
1841 "LowerSCALAR_TO_VECTOR");
1842 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1843 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1844 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1845 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1846 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1847 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1850 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1851 for (size_t j = 0; j < n_copies; ++j)
1852 ConstVecValues.push_back(CValue);
1854 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1855 &ConstVecValues[0], ConstVecValues.size());
1857 // Otherwise, copy the value from one register to another:
1858 switch (Op0.getValueType().getSimpleVT()) {
1859 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1866 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1873 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1874 switch (Op.getValueType().getSimpleVT()) {
1876 cerr << "CellSPU: Unknown vector multiplication, got "
1877 << Op.getValueType().getMVTString()
1883 SDValue rA = Op.getOperand(0);
1884 SDValue rB = Op.getOperand(1);
1885 SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1886 SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1887 SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1888 SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1890 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1894 // Multiply two v8i16 vectors (pipeline friendly version):
1895 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1896 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1897 // c) Use SELB to select upper and lower halves from the intermediate results
1899 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1900 // dual-issue. This code does manage to do this, even if it's a little on
1903 MachineFunction &MF = DAG.getMachineFunction();
1904 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1905 SDValue Chain = Op.getOperand(0);
1906 SDValue rA = Op.getOperand(0);
1907 SDValue rB = Op.getOperand(1);
1908 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1909 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1912 DAG.getCopyToReg(Chain, FSMBIreg,
1913 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1914 DAG.getConstant(0xcccc, MVT::i16)));
1917 DAG.getCopyToReg(FSMBOp, HiProdReg,
1918 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1920 SDValue HHProd_v4i32 =
1921 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1922 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1924 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1925 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1926 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1927 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1929 DAG.getConstant(16, MVT::i16))),
1930 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1933 // This M00sE is N@stI! (apologies to Monty Python)
1935 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1936 // is to break it all apart, sign extend, and reassemble the various
1937 // intermediate products.
1939 SDValue rA = Op.getOperand(0);
1940 SDValue rB = Op.getOperand(1);
1941 SDValue c8 = DAG.getConstant(8, MVT::i32);
1942 SDValue c16 = DAG.getConstant(16, MVT::i32);
1945 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1946 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1947 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1949 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1951 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1954 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1955 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1957 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1958 DAG.getConstant(0x2222, MVT::i16));
1960 SDValue LoProdParts =
1961 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1962 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1963 LLProd, LHProd, FSMBmask));
1965 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1968 DAG.getNode(ISD::AND, MVT::v4i32,
1970 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1971 LoProdMask, LoProdMask,
1972 LoProdMask, LoProdMask));
1975 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1976 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1979 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1980 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1983 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1984 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1985 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1988 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1989 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1990 DAG.getNode(SPUISD::VEC_SRA,
1991 MVT::v4i32, rAH, c8)),
1992 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1993 DAG.getNode(SPUISD::VEC_SRA,
1994 MVT::v4i32, rBH, c8)));
1997 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1999 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2003 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2005 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2006 DAG.getNode(ISD::OR, MVT::v4i32,
2014 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2015 MachineFunction &MF = DAG.getMachineFunction();
2016 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2018 SDValue A = Op.getOperand(0);
2019 SDValue B = Op.getOperand(1);
2020 MVT VT = Op.getValueType();
2022 unsigned VRegBR, VRegC;
2024 if (VT == MVT::f32) {
2025 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2026 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2028 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2029 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2031 // TODO: make sure we're feeding FPInterp the right arguments
2032 // Right now: fi B, frest(B)
2035 // (Floating Interpolate (FP Reciprocal Estimate B))
2037 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2038 DAG.getNode(SPUISD::FPInterp, VT, B,
2039 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2041 // Computes A * BRcpl and stores in a temporary register
2043 DAG.getCopyToReg(BRcpl, VRegC,
2044 DAG.getNode(ISD::FMUL, VT, A,
2045 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2046 // What's the Chain variable do? It's magic!
2047 // TODO: set Chain = Op(0).getEntryNode()
2049 return DAG.getNode(ISD::FADD, VT,
2050 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2051 DAG.getNode(ISD::FMUL, VT,
2052 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2053 DAG.getNode(ISD::FSUB, VT, A,
2054 DAG.getNode(ISD::FMUL, VT, B,
2055 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2058 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2059 MVT VT = Op.getValueType();
2060 SDValue N = Op.getOperand(0);
2061 SDValue Elt = Op.getOperand(1);
2064 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2065 // Constant argument:
2066 int EltNo = (int) C->getZExtValue();
2069 if (VT == MVT::i8 && EltNo >= 16)
2070 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2071 else if (VT == MVT::i16 && EltNo >= 8)
2072 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2073 else if (VT == MVT::i32 && EltNo >= 4)
2074 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2075 else if (VT == MVT::i64 && EltNo >= 2)
2076 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2078 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2079 // i32 and i64: Element 0 is the preferred slot
2080 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2083 // Need to generate shuffle mask and extract:
2084 int prefslot_begin = -1, prefslot_end = -1;
2085 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2087 switch (VT.getSimpleVT()) {
2089 assert(false && "Invalid value type!");
2091 prefslot_begin = prefslot_end = 3;
2095 prefslot_begin = 2; prefslot_end = 3;
2100 prefslot_begin = 0; prefslot_end = 3;
2105 prefslot_begin = 0; prefslot_end = 7;
2110 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2111 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2113 unsigned int ShufBytes[16];
2114 for (int i = 0; i < 16; ++i) {
2115 // zero fill uppper part of preferred slot, don't care about the
2117 unsigned int mask_val;
2118 if (i <= prefslot_end) {
2120 ((i < prefslot_begin)
2122 : elt_byte + (i - prefslot_begin));
2124 ShufBytes[i] = mask_val;
2126 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
2129 SDValue ShufMask[4];
2130 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2131 unsigned bidx = i / 4;
2132 unsigned int bits = ((ShufBytes[bidx] << 24) |
2133 (ShufBytes[bidx+1] << 16) |
2134 (ShufBytes[bidx+2] << 8) |
2136 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2139 SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2141 sizeof(ShufMask) / sizeof(ShufMask[0]));
2143 retval = DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2144 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2145 N, N, ShufMaskVec));
2147 // Variable index: Rotate the requested element into slot 0, then replicate
2148 // slot 0 across the vector
2149 MVT VecVT = N.getValueType();
2150 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2151 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
2155 // Make life easier by making sure the index is zero-extended to i32
2156 if (Elt.getValueType() != MVT::i32)
2157 Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
2159 // Scale the index to a bit/byte shift quantity
2161 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2164 switch (VT.getSimpleVT()) {
2166 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2170 // Don't need to scale, but we do need to correct for where bytes go in
2172 SDValue prefSlot = DAG.getNode(ISD::SUB, MVT::i32,
2173 Elt, DAG.getConstant(3, MVT::i32));
2174 SDValue corrected = DAG.getNode(ISD::ADD, MVT::i32, prefSlot,
2175 DAG.getConstant(16, MVT::i32));
2177 SDValue shiftAmt = DAG.getNode(ISD::SELECT_CC, MVT::i32,
2178 prefSlot, DAG.getConstant(0, MVT::i32),
2179 prefSlot, // trueval
2180 corrected, // falseval
2181 DAG.getCondCode(ISD::SETGT));
2182 vecShift = DAG.getNode(SPUISD::ROTBYTES_LEFT, VecVT, N, shiftAmt);
2186 // Scale the index to bytes, subtract for preferred slot:
2187 Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2188 DAG.getConstant(scaleFactor.logBase2(), MVT::i32));
2189 SDValue prefSlot = DAG.getNode(ISD::SUB, MVT::i32,
2190 Elt, DAG.getConstant(2, MVT::i32));
2191 SDValue corrected = DAG.getNode(ISD::ADD, MVT::i32, prefSlot,
2192 DAG.getConstant(16, MVT::i32));
2194 SDValue shiftAmt = DAG.getNode(ISD::SELECT_CC, MVT::i32,
2195 prefSlot, DAG.getConstant(0, MVT::i32),
2196 prefSlot, // trueval
2197 corrected, // falseval
2198 DAG.getCondCode(ISD::SETGT));
2199 vecShift = DAG.getNode(SPUISD::ROTBYTES_LEFT, VecVT, N, shiftAmt);
2206 // Simple left shift to slot 0
2207 Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2208 DAG.getConstant(scaleFactor.logBase2(), MVT::i32));
2209 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2213 // Replicate slot 0 across the entire vector (for consistency with the
2214 // notion of a unified register set)
2217 switch (VT.getSimpleVT()) {
2219 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2223 SDValue factor = DAG.getConstant(0x03030303, MVT::i32);
2224 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2229 SDValue factor = DAG.getConstant(0x02030203, MVT::i32);
2230 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2236 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2237 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2243 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2244 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2245 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2246 loFactor, hiFactor);
2251 retval = DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2252 DAG.getNode(SPUISD::SHUFB, VecVT, vecShift, vecShift, replicate));
2258 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2259 SDValue VecOp = Op.getOperand(0);
2260 SDValue ValOp = Op.getOperand(1);
2261 SDValue IdxOp = Op.getOperand(2);
2262 MVT VT = Op.getValueType();
2264 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2265 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2267 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2268 // Use $2 because it's always 16-byte aligned and it's available:
2269 SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2272 DAG.getNode(SPUISD::SHUFB, VT,
2273 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2275 DAG.getNode(SPUISD::SHUFFLE_MASK, VT,
2276 DAG.getNode(ISD::ADD, PtrVT,
2278 DAG.getConstant(CN->getZExtValue(),
2284 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2286 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2288 assert(Op.getValueType() == MVT::i8);
2291 assert(0 && "Unhandled i8 math operator");
2295 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2297 SDValue N1 = Op.getOperand(1);
2298 N0 = (N0.getOpcode() != ISD::Constant
2299 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2300 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2302 N1 = (N1.getOpcode() != ISD::Constant
2303 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2304 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2306 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2307 DAG.getNode(Opc, MVT::i16, N0, N1));
2311 SDValue N1 = Op.getOperand(1);
2313 N0 = (N0.getOpcode() != ISD::Constant
2314 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2315 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2317 N1Opc = N1.getValueType().bitsLT(MVT::i32)
2320 N1 = (N1.getOpcode() != ISD::Constant
2321 ? DAG.getNode(N1Opc, MVT::i32, N1)
2322 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2325 DAG.getNode(ISD::OR, MVT::i16, N0,
2326 DAG.getNode(ISD::SHL, MVT::i16,
2327 N0, DAG.getConstant(8, MVT::i32)));
2328 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2329 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2333 SDValue N1 = Op.getOperand(1);
2335 N0 = (N0.getOpcode() != ISD::Constant
2336 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2337 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2339 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2342 N1 = (N1.getOpcode() != ISD::Constant
2343 ? DAG.getNode(N1Opc, MVT::i16, N1)
2344 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2346 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2347 DAG.getNode(Opc, MVT::i16, N0, N1));
2350 SDValue N1 = Op.getOperand(1);
2352 N0 = (N0.getOpcode() != ISD::Constant
2353 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2354 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2356 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2359 N1 = (N1.getOpcode() != ISD::Constant
2360 ? DAG.getNode(N1Opc, MVT::i16, N1)
2361 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2363 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2364 DAG.getNode(Opc, MVT::i16, N0, N1));
2367 SDValue N1 = Op.getOperand(1);
2369 N0 = (N0.getOpcode() != ISD::Constant
2370 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2371 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2373 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2374 N1 = (N1.getOpcode() != ISD::Constant
2375 ? DAG.getNode(N1Opc, MVT::i16, N1)
2376 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2378 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2379 DAG.getNode(Opc, MVT::i16, N0, N1));
2387 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2389 MVT VT = Op.getValueType();
2390 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2392 SDValue Op0 = Op.getOperand(0);
2395 case ISD::ZERO_EXTEND:
2396 case ISD::SIGN_EXTEND:
2397 case ISD::ANY_EXTEND: {
2398 MVT Op0VT = Op0.getValueType();
2399 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2401 assert(Op0VT == MVT::i32
2402 && "CellSPU: Zero/sign extending something other than i32");
2403 DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2405 unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2406 ? SPUISD::ROTBYTES_RIGHT_S
2407 : SPUISD::ROTQUAD_RZ_BYTES);
2408 SDValue PromoteScalar =
2409 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2411 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2412 DAG.getNode(ISD::BIT_CONVERT, VecVT,
2413 DAG.getNode(NewOpc, Op0VecVT,
2415 DAG.getConstant(4, MVT::i32))));
2419 // Turn operands into vectors to satisfy type checking (shufb works on
2422 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2424 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2425 SmallVector<SDValue, 16> ShufBytes;
2427 // Create the shuffle mask for "rotating" the borrow up one register slot
2428 // once the borrow is generated.
2429 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2430 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2431 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2432 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2435 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2436 SDValue ShiftedCarry =
2437 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2439 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2440 &ShufBytes[0], ShufBytes.size()));
2442 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2443 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2444 Op0, Op1, ShiftedCarry));
2448 // Turn operands into vectors to satisfy type checking (shufb works on
2451 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2453 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2454 SmallVector<SDValue, 16> ShufBytes;
2456 // Create the shuffle mask for "rotating" the borrow up one register slot
2457 // once the borrow is generated.
2458 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2459 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2460 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2461 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2464 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2465 SDValue ShiftedBorrow =
2466 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2467 BorrowGen, BorrowGen,
2468 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2469 &ShufBytes[0], ShufBytes.size()));
2471 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2472 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2473 Op0, Op1, ShiftedBorrow));
2477 SDValue ShiftAmt = Op.getOperand(1);
2478 MVT ShiftAmtVT = ShiftAmt.getValueType();
2479 SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2481 DAG.getNode(SPUISD::SELB, VecVT,
2483 DAG.getConstant(0, VecVT),
2484 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2485 DAG.getConstant(0xff00ULL, MVT::i16)));
2486 SDValue ShiftAmtBytes =
2487 DAG.getNode(ISD::SRL, ShiftAmtVT,
2489 DAG.getConstant(3, ShiftAmtVT));
2490 SDValue ShiftAmtBits =
2491 DAG.getNode(ISD::AND, ShiftAmtVT,
2493 DAG.getConstant(7, ShiftAmtVT));
2495 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2496 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2497 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2498 MaskLower, ShiftAmtBytes),
2503 MVT VT = Op.getValueType();
2504 SDValue ShiftAmt = Op.getOperand(1);
2505 MVT ShiftAmtVT = ShiftAmt.getValueType();
2506 SDValue ShiftAmtBytes =
2507 DAG.getNode(ISD::SRL, ShiftAmtVT,
2509 DAG.getConstant(3, ShiftAmtVT));
2510 SDValue ShiftAmtBits =
2511 DAG.getNode(ISD::AND, ShiftAmtVT,
2513 DAG.getConstant(7, ShiftAmtVT));
2515 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2516 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2517 Op0, ShiftAmtBytes),
2522 // Promote Op0 to vector
2524 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2525 SDValue ShiftAmt = Op.getOperand(1);
2526 MVT ShiftVT = ShiftAmt.getValueType();
2528 // Negate variable shift amounts
2529 if (!isa<ConstantSDNode>(ShiftAmt)) {
2530 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2531 DAG.getConstant(0, ShiftVT), ShiftAmt);
2534 SDValue UpperHalfSign =
2535 DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2536 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2537 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2538 Op0, DAG.getConstant(31, MVT::i32))));
2539 SDValue UpperHalfSignMask =
2540 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2541 SDValue UpperLowerMask =
2542 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2543 DAG.getConstant(0xff00, MVT::i16));
2544 SDValue UpperLowerSelect =
2545 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2546 UpperHalfSignMask, Op0, UpperLowerMask);
2547 SDValue RotateLeftBytes =
2548 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2549 UpperLowerSelect, ShiftAmt);
2550 SDValue RotateLeftBits =
2551 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2552 RotateLeftBytes, ShiftAmt);
2554 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2562 //! Lower byte immediate operations for v16i8 vectors:
2564 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2567 MVT VT = Op.getValueType();
2569 ConstVec = Op.getOperand(0);
2570 Arg = Op.getOperand(1);
2571 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2572 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2573 ConstVec = ConstVec.getOperand(0);
2575 ConstVec = Op.getOperand(1);
2576 Arg = Op.getOperand(0);
2577 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2578 ConstVec = ConstVec.getOperand(0);
2583 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2584 uint64_t VectorBits[2];
2585 uint64_t UndefBits[2];
2586 uint64_t SplatBits, SplatUndef;
2589 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2590 && isConstantSplat(VectorBits, UndefBits,
2591 VT.getVectorElementType().getSizeInBits(),
2592 SplatBits, SplatUndef, SplatSize)) {
2594 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2595 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2597 // Turn the BUILD_VECTOR into a set of target constants:
2598 for (size_t i = 0; i < tcVecSize; ++i)
2601 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2602 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2605 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2606 // lowered. Return the operation, rather than a null SDValue.
2610 //! Lower i32 multiplication
2611 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2613 switch (VT.getSimpleVT()) {
2615 cerr << "CellSPU: Unknown LowerMUL value type, got "
2616 << Op.getValueType().getMVTString()
2622 SDValue rA = Op.getOperand(0);
2623 SDValue rB = Op.getOperand(1);
2625 return DAG.getNode(ISD::ADD, MVT::i32,
2626 DAG.getNode(ISD::ADD, MVT::i32,
2627 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2628 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2629 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2636 //! Custom lowering for CTPOP (count population)
2638 Custom lowering code that counts the number ones in the input
2639 operand. SPU has such an instruction, but it counts the number of
2640 ones per byte, which then have to be accumulated.
2642 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2643 MVT VT = Op.getValueType();
2644 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2646 switch (VT.getSimpleVT()) {
2648 assert(false && "Invalid value type!");
2650 SDValue N = Op.getOperand(0);
2651 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2653 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2654 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2656 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2660 MachineFunction &MF = DAG.getMachineFunction();
2661 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2663 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2665 SDValue N = Op.getOperand(0);
2666 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2667 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2668 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2670 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2671 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2673 // CNTB_result becomes the chain to which all of the virtual registers
2674 // CNTB_reg, SUM1_reg become associated:
2675 SDValue CNTB_result =
2676 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2678 SDValue CNTB_rescopy =
2679 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2681 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2683 return DAG.getNode(ISD::AND, MVT::i16,
2684 DAG.getNode(ISD::ADD, MVT::i16,
2685 DAG.getNode(ISD::SRL, MVT::i16,
2692 MachineFunction &MF = DAG.getMachineFunction();
2693 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2695 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2696 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2698 SDValue N = Op.getOperand(0);
2699 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2700 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2701 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2702 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2704 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2705 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2707 // CNTB_result becomes the chain to which all of the virtual registers
2708 // CNTB_reg, SUM1_reg become associated:
2709 SDValue CNTB_result =
2710 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2712 SDValue CNTB_rescopy =
2713 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2716 DAG.getNode(ISD::SRL, MVT::i32,
2717 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2720 DAG.getNode(ISD::ADD, MVT::i32,
2721 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2723 SDValue Sum1_rescopy =
2724 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2727 DAG.getNode(ISD::SRL, MVT::i32,
2728 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2731 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2732 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2734 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2744 //! Lower ISD::SELECT_CC
2746 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2749 \note Need to revisit this in the future: if the code path through the true
2750 and false value computations is longer than the latency of a branch (6
2751 cycles), then it would be more advantageous to branch and insert a new basic
2752 block and branch on the condition. However, this code does not make that
2753 assumption, given the simplisitc uses so far.
2756 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
2757 MVT VT = Op.getValueType();
2758 SDValue lhs = Op.getOperand(0);
2759 SDValue rhs = Op.getOperand(1);
2760 SDValue trueval = Op.getOperand(2);
2761 SDValue falseval = Op.getOperand(3);
2762 SDValue condition = Op.getOperand(4);
2764 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2765 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2766 // with another "cannot select select_cc" assert:
2768 SDValue compare = DAG.getNode(ISD::SETCC, VT, lhs, rhs, condition);
2769 return DAG.getNode(SPUISD::SELB, VT, trueval, falseval, compare);
2772 //! Custom (target-specific) lowering entry point
2774 This is where LLVM's DAG selection process calls to do target-specific
2778 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2780 unsigned Opc = (unsigned) Op.getOpcode();
2781 MVT VT = Op.getValueType();
2785 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2786 cerr << "Op.getOpcode() = " << Opc << "\n";
2787 cerr << "*Op.getNode():\n";
2788 Op.getNode()->dump();
2794 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2796 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2797 case ISD::ConstantPool:
2798 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2799 case ISD::GlobalAddress:
2800 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2801 case ISD::JumpTable:
2802 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2804 return LowerConstant(Op, DAG);
2805 case ISD::ConstantFP:
2806 return LowerConstantFP(Op, DAG);
2808 return LowerBRCOND(Op, DAG);
2809 case ISD::FORMAL_ARGUMENTS:
2810 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2812 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2814 return LowerRET(Op, DAG, getTargetMachine());
2817 // i8, i64 math ops:
2818 case ISD::ZERO_EXTEND:
2819 case ISD::SIGN_EXTEND:
2820 case ISD::ANY_EXTEND:
2829 return LowerI8Math(Op, DAG, Opc);
2830 else if (VT == MVT::i64)
2831 return LowerI64Math(Op, DAG, Opc);
2835 // Vector-related lowering.
2836 case ISD::BUILD_VECTOR:
2837 return LowerBUILD_VECTOR(Op, DAG);
2838 case ISD::SCALAR_TO_VECTOR:
2839 return LowerSCALAR_TO_VECTOR(Op, DAG);
2840 case ISD::VECTOR_SHUFFLE:
2841 return LowerVECTOR_SHUFFLE(Op, DAG);
2842 case ISD::EXTRACT_VECTOR_ELT:
2843 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2844 case ISD::INSERT_VECTOR_ELT:
2845 return LowerINSERT_VECTOR_ELT(Op, DAG);
2847 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2851 return LowerByteImmed(Op, DAG);
2853 // Vector and i8 multiply:
2856 return LowerVectorMUL(Op, DAG);
2857 else if (VT == MVT::i8)
2858 return LowerI8Math(Op, DAG, Opc);
2860 return LowerMUL(Op, DAG, VT, Opc);
2863 if (VT == MVT::f32 || VT == MVT::v4f32)
2864 return LowerFDIVf32(Op, DAG);
2866 // This is probably a libcall
2867 else if (Op.getValueType() == MVT::f64)
2868 return LowerFDIVf64(Op, DAG);
2871 assert(0 && "Calling FDIV on unsupported MVT");
2874 return LowerCTPOP(Op, DAG);
2876 case ISD::SELECT_CC:
2877 return LowerSELECT_CC(Op, DAG);
2883 SDNode *SPUTargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG)
2886 unsigned Opc = (unsigned) N->getOpcode();
2887 MVT OpVT = N->getValueType(0);
2891 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2892 cerr << "Op.getOpcode() = " << Opc << "\n";
2893 cerr << "*Op.getNode():\n";
2901 /* Otherwise, return unchanged */
2905 //===----------------------------------------------------------------------===//
2906 // Target Optimization Hooks
2907 //===----------------------------------------------------------------------===//
2910 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2913 TargetMachine &TM = getTargetMachine();
2915 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2916 SelectionDAG &DAG = DCI.DAG;
2917 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2918 SDValue Result; // Initially, NULL result
2920 switch (N->getOpcode()) {
2923 SDValue Op1 = N->getOperand(1);
2925 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2926 SDValue Op01 = Op0.getOperand(1);
2927 if (Op01.getOpcode() == ISD::Constant
2928 || Op01.getOpcode() == ISD::TargetConstant) {
2929 // (add <const>, (SPUindirect <arg>, <const>)) ->
2930 // (SPUindirect <arg>, <const + const>)
2931 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2932 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2933 SDValue combinedConst =
2934 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2935 Op0.getValueType());
2937 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2938 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2939 DEBUG(cerr << "With: (SPUindirect <arg>, "
2940 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2941 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2942 Op0.getOperand(0), combinedConst);
2944 } else if (isa<ConstantSDNode>(Op0)
2945 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2946 SDValue Op11 = Op1.getOperand(1);
2947 if (Op11.getOpcode() == ISD::Constant
2948 || Op11.getOpcode() == ISD::TargetConstant) {
2949 // (add (SPUindirect <arg>, <const>), <const>) ->
2950 // (SPUindirect <arg>, <const + const>)
2951 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2952 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2953 SDValue combinedConst =
2954 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2955 Op0.getValueType());
2957 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2958 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2959 DEBUG(cerr << "With: (SPUindirect <arg>, "
2960 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2962 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2963 Op1.getOperand(0), combinedConst);
2968 case ISD::SIGN_EXTEND:
2969 case ISD::ZERO_EXTEND:
2970 case ISD::ANY_EXTEND: {
2971 if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2972 N->getValueType(0) == Op0.getValueType()) {
2973 // (any_extend (SPUextract_elt0 <arg>)) ->
2974 // (SPUextract_elt0 <arg>)
2975 // Types must match, however...
2976 DEBUG(cerr << "Replace: ");
2977 DEBUG(N->dump(&DAG));
2978 DEBUG(cerr << "\nWith: ");
2979 DEBUG(Op0.getNode()->dump(&DAG));
2980 DEBUG(cerr << "\n");
2986 case SPUISD::IndirectAddr: {
2987 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2988 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2989 if (CN->getZExtValue() == 0) {
2990 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2991 // (SPUaform <addr>, 0)
2993 DEBUG(cerr << "Replace: ");
2994 DEBUG(N->dump(&DAG));
2995 DEBUG(cerr << "\nWith: ");
2996 DEBUG(Op0.getNode()->dump(&DAG));
2997 DEBUG(cerr << "\n");
3004 case SPUISD::SHLQUAD_L_BITS:
3005 case SPUISD::SHLQUAD_L_BYTES:
3006 case SPUISD::VEC_SHL:
3007 case SPUISD::VEC_SRL:
3008 case SPUISD::VEC_SRA:
3009 case SPUISD::ROTQUAD_RZ_BYTES:
3010 case SPUISD::ROTQUAD_RZ_BITS: {
3011 SDValue Op1 = N->getOperand(1);
3013 if (isa<ConstantSDNode>(Op1)) {
3014 // Kill degenerate vector shifts:
3015 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
3017 if (CN->getZExtValue() == 0) {
3023 case SPUISD::PROMOTE_SCALAR: {
3024 switch (Op0.getOpcode()) {
3027 case ISD::ANY_EXTEND:
3028 case ISD::ZERO_EXTEND:
3029 case ISD::SIGN_EXTEND: {
3030 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
3032 // but only if the SPUpromote_scalar and <arg> types match.
3033 SDValue Op00 = Op0.getOperand(0);
3034 if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
3035 SDValue Op000 = Op00.getOperand(0);
3036 if (Op000.getValueType() == N->getValueType(0)) {
3042 case SPUISD::EXTRACT_ELT0: {
3043 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
3045 Result = Op0.getOperand(0);
3052 // Otherwise, return unchanged.
3054 if (Result.getNode()) {
3055 DEBUG(cerr << "\nReplace.SPU: ");
3056 DEBUG(N->dump(&DAG));
3057 DEBUG(cerr << "\nWith: ");
3058 DEBUG(Result.getNode()->dump(&DAG));
3059 DEBUG(cerr << "\n");
3066 //===----------------------------------------------------------------------===//
3067 // Inline Assembly Support
3068 //===----------------------------------------------------------------------===//
3070 /// getConstraintType - Given a constraint letter, return the type of
3071 /// constraint it is for this target.
3072 SPUTargetLowering::ConstraintType
3073 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3074 if (ConstraintLetter.size() == 1) {
3075 switch (ConstraintLetter[0]) {
3082 return C_RegisterClass;
3085 return TargetLowering::getConstraintType(ConstraintLetter);
3088 std::pair<unsigned, const TargetRegisterClass*>
3089 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3092 if (Constraint.size() == 1) {
3093 // GCC RS6000 Constraint Letters
3094 switch (Constraint[0]) {
3098 return std::make_pair(0U, SPU::R64CRegisterClass);
3099 return std::make_pair(0U, SPU::R32CRegisterClass);
3102 return std::make_pair(0U, SPU::R32FPRegisterClass);
3103 else if (VT == MVT::f64)
3104 return std::make_pair(0U, SPU::R64FPRegisterClass);
3107 return std::make_pair(0U, SPU::GPRCRegisterClass);
3111 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3114 //! Compute used/known bits for a SPU operand
3116 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3120 const SelectionDAG &DAG,
3121 unsigned Depth ) const {
3123 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
3126 switch (Op.getOpcode()) {
3128 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3138 case SPUISD::PROMOTE_SCALAR: {
3139 SDValue Op0 = Op.getOperand(0);
3140 MVT Op0VT = Op0.getValueType();
3141 unsigned Op0VTBits = Op0VT.getSizeInBits();
3142 uint64_t InMask = Op0VT.getIntegerVTBitMask();
3143 KnownZero |= APInt(Op0VTBits, ~InMask, false);
3144 KnownOne |= APInt(Op0VTBits, InMask, false);
3148 case SPUISD::LDRESULT:
3149 case SPUISD::EXTRACT_ELT0:
3150 case SPUISD::EXTRACT_ELT0_CHAINED: {
3151 MVT OpVT = Op.getValueType();
3152 unsigned OpVTBits = OpVT.getSizeInBits();
3153 uint64_t InMask = OpVT.getIntegerVTBitMask();
3154 KnownZero |= APInt(OpVTBits, ~InMask, false);
3155 KnownOne |= APInt(OpVTBits, InMask, false);
3160 case EXTRACT_I1_ZEXT:
3161 case EXTRACT_I1_SEXT:
3162 case EXTRACT_I8_ZEXT:
3163 case EXTRACT_I8_SEXT:
3168 case SPUISD::SHLQUAD_L_BITS:
3169 case SPUISD::SHLQUAD_L_BYTES:
3170 case SPUISD::VEC_SHL:
3171 case SPUISD::VEC_SRL:
3172 case SPUISD::VEC_SRA:
3173 case SPUISD::VEC_ROTL:
3174 case SPUISD::VEC_ROTR:
3175 case SPUISD::ROTQUAD_RZ_BYTES:
3176 case SPUISD::ROTQUAD_RZ_BITS:
3177 case SPUISD::ROTBYTES_RIGHT_S:
3178 case SPUISD::ROTBYTES_LEFT:
3179 case SPUISD::ROTBYTES_LEFT_CHAINED:
3180 case SPUISD::SELECT_MASK:
3182 case SPUISD::FPInterp:
3183 case SPUISD::FPRecipEst:
3184 case SPUISD::SEXT32TO64:
3189 // LowerAsmOperandForConstraint
3191 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3192 char ConstraintLetter,
3194 std::vector<SDValue> &Ops,
3195 SelectionDAG &DAG) const {
3196 // Default, for the time being, to the base class handler
3197 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3201 /// isLegalAddressImmediate - Return true if the integer value can be used
3202 /// as the offset of the target addressing mode.
3203 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3204 const Type *Ty) const {
3205 // SPU's addresses are 256K:
3206 return (V > -(1 << 18) && V < (1 << 18) - 1);
3209 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3214 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3215 // The SPU target isn't yet aware of offsets.