1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT mapping to useful data for Cell SPU
41 struct valtype_map_s {
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDValue &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDValue &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // Initialize libcalls:
134 setLibcallName(RTLIB::MUL_I64, "__muldi3");
136 // SPU has no sign or zero extended loads for i1, i8, i16:
137 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
138 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
141 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
142 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
143 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
144 setTruncStoreAction(MVT::i8, MVT::i8, Custom);
145 setTruncStoreAction(MVT::i16, MVT::i8, Custom);
146 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
147 setTruncStoreAction(MVT::i64, MVT::i8, Custom);
148 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
150 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
151 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
152 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
154 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Custom);
156 // SPU constant load actions are custom lowered:
157 setOperationAction(ISD::Constant, MVT::i64, Custom);
158 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
159 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
161 // SPU's loads and stores have to be custom lowered:
162 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
164 MVT VT = (MVT::SimpleValueType)sctype;
166 setOperationAction(ISD::LOAD, VT, Custom);
167 setOperationAction(ISD::STORE, VT, Custom);
170 // Custom lower BRCOND for i8 to "promote" the result to i16
171 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
173 // Expand the jumptable branches
174 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
175 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
177 // Custom lower SELECT_CC for most cases, but expand by default
178 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
179 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
180 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
181 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
183 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
186 // SPU has no intrinsics for these particular operations:
187 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
189 // PowerPC has no SREM/UREM instructions
190 setOperationAction(ISD::SREM, MVT::i32, Expand);
191 setOperationAction(ISD::UREM, MVT::i32, Expand);
192 setOperationAction(ISD::SREM, MVT::i64, Expand);
193 setOperationAction(ISD::UREM, MVT::i64, Expand);
195 // We don't support sin/cos/sqrt/fmod
196 setOperationAction(ISD::FSIN , MVT::f64, Expand);
197 setOperationAction(ISD::FCOS , MVT::f64, Expand);
198 setOperationAction(ISD::FREM , MVT::f64, Expand);
199 setOperationAction(ISD::FSIN , MVT::f32, Expand);
200 setOperationAction(ISD::FCOS , MVT::f32, Expand);
201 setOperationAction(ISD::FREM , MVT::f32, Expand);
203 // If we're enabling GP optimizations, use hardware square root
204 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
205 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
207 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
208 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
210 // SPU can do rotate right and left, so legalize it... but customize for i8
211 // because instructions don't exist.
213 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
215 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
216 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
217 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
219 setOperationAction(ISD::ROTL, MVT::i32, Legal);
220 setOperationAction(ISD::ROTL, MVT::i16, Legal);
221 setOperationAction(ISD::ROTL, MVT::i8, Custom);
223 // SPU has no native version of shift left/right for i8
224 setOperationAction(ISD::SHL, MVT::i8, Custom);
225 setOperationAction(ISD::SRL, MVT::i8, Custom);
226 setOperationAction(ISD::SRA, MVT::i8, Custom);
228 // SPU needs custom lowering for shift left/right for i64
229 setOperationAction(ISD::SHL, MVT::i64, Custom);
230 setOperationAction(ISD::SRL, MVT::i64, Custom);
231 setOperationAction(ISD::SRA, MVT::i64, Custom);
233 // Custom lower i8, i32 and i64 multiplications
234 setOperationAction(ISD::MUL, MVT::i8, Custom);
235 setOperationAction(ISD::MUL, MVT::i32, Custom);
236 setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
238 // SMUL_LOHI, UMUL_LOHI
239 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
240 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
241 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom);
242 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom);
244 // Need to custom handle (some) common i8, i64 math ops
245 setOperationAction(ISD::ADD, MVT::i64, Custom);
246 setOperationAction(ISD::SUB, MVT::i8, Custom);
247 setOperationAction(ISD::SUB, MVT::i64, Custom);
249 // SPU does not have BSWAP. It does have i32 support CTLZ.
250 // CTPOP has to be custom lowered.
251 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
252 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
254 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
255 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
256 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
257 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
259 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
260 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
262 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
264 // SPU has a version of select that implements (a&~c)|(b&c), just like
265 // select ought to work:
266 setOperationAction(ISD::SELECT, MVT::i8, Legal);
267 setOperationAction(ISD::SELECT, MVT::i16, Legal);
268 setOperationAction(ISD::SELECT, MVT::i32, Legal);
269 setOperationAction(ISD::SELECT, MVT::i64, Expand);
271 setOperationAction(ISD::SETCC, MVT::i8, Legal);
272 setOperationAction(ISD::SETCC, MVT::i16, Legal);
273 setOperationAction(ISD::SETCC, MVT::i32, Legal);
274 setOperationAction(ISD::SETCC, MVT::i64, Expand);
276 // Zero extension and sign extension for i64 have to be
278 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
279 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
280 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
282 // Custom lower truncates
283 setOperationAction(ISD::TRUNCATE, MVT::i8, Custom);
284 setOperationAction(ISD::TRUNCATE, MVT::i16, Custom);
285 setOperationAction(ISD::TRUNCATE, MVT::i32, Custom);
286 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
288 // SPU has a legal FP -> signed INT instruction
289 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
290 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
291 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
292 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
294 // FDIV on SPU requires custom lowering
295 setOperationAction(ISD::FDIV, MVT::f32, Custom);
296 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
298 // SPU has [U|S]INT_TO_FP
299 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
300 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
301 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
302 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
303 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
304 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
305 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
306 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
308 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
309 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
310 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
311 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
313 // We cannot sextinreg(i1). Expand to shifts.
314 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
316 // Support label based line numbers.
317 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
318 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
320 // We want to legalize GlobalAddress and ConstantPool nodes into the
321 // appropriate instructions to materialize the address.
322 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
324 MVT VT = (MVT::SimpleValueType)sctype;
326 setOperationAction(ISD::GlobalAddress, VT, Custom);
327 setOperationAction(ISD::ConstantPool, VT, Custom);
328 setOperationAction(ISD::JumpTable, VT, Custom);
331 // RET must be custom lowered, to meet ABI requirements
332 setOperationAction(ISD::RET, MVT::Other, Custom);
334 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
335 setOperationAction(ISD::VASTART , MVT::Other, Custom);
337 // Use the default implementation.
338 setOperationAction(ISD::VAARG , MVT::Other, Expand);
339 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
340 setOperationAction(ISD::VAEND , MVT::Other, Expand);
341 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
342 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
343 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
344 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
346 // Cell SPU has instructions for converting between i64 and fp.
347 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
348 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
350 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
351 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
353 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
354 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
356 // First set operation action for all vector types to expand. Then we
357 // will selectively turn on ones that can be effectively codegen'd.
358 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
359 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
360 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
361 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
362 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
363 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
365 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
366 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
367 MVT VT = (MVT::SimpleValueType)i;
369 // add/sub are legal for all supported vector VT's.
370 setOperationAction(ISD::ADD , VT, Legal);
371 setOperationAction(ISD::SUB , VT, Legal);
372 // mul has to be custom lowered.
373 setOperationAction(ISD::MUL , VT, Custom);
375 setOperationAction(ISD::AND , VT, Legal);
376 setOperationAction(ISD::OR , VT, Legal);
377 setOperationAction(ISD::XOR , VT, Legal);
378 setOperationAction(ISD::LOAD , VT, Legal);
379 setOperationAction(ISD::SELECT, VT, Legal);
380 setOperationAction(ISD::STORE, VT, Legal);
382 // These operations need to be expanded:
383 setOperationAction(ISD::SDIV, VT, Expand);
384 setOperationAction(ISD::SREM, VT, Expand);
385 setOperationAction(ISD::UDIV, VT, Expand);
386 setOperationAction(ISD::UREM, VT, Expand);
387 setOperationAction(ISD::FDIV, VT, Custom);
389 // Custom lower build_vector, constant pool spills, insert and
390 // extract vector elements:
391 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
392 setOperationAction(ISD::ConstantPool, VT, Custom);
393 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
394 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
395 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
396 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
399 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
400 setOperationAction(ISD::AND, MVT::v16i8, Custom);
401 setOperationAction(ISD::OR, MVT::v16i8, Custom);
402 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
403 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
405 setShiftAmountType(MVT::i32);
406 setBooleanContents(ZeroOrOneBooleanContent);
408 setStackPointerRegisterToSaveRestore(SPU::R1);
410 // We have target-specific dag combine patterns for the following nodes:
411 setTargetDAGCombine(ISD::ADD);
412 setTargetDAGCombine(ISD::ZERO_EXTEND);
413 setTargetDAGCombine(ISD::SIGN_EXTEND);
414 setTargetDAGCombine(ISD::ANY_EXTEND);
416 computeRegisterProperties();
418 // Set other properties:
419 setSchedulingPreference(SchedulingForLatency);
423 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
425 if (node_names.empty()) {
426 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
427 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
428 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
429 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
430 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
431 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
432 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
433 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
434 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
435 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
436 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
437 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
438 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
439 node_names[(unsigned) SPUISD::VEC2PREFSLOT_CHAINED]
440 = "SPUISD::VEC2PREFSLOT_CHAINED";
441 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
442 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
443 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
444 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
445 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
446 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
447 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
448 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
449 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
450 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
451 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
452 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
453 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
454 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
455 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
456 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
457 "SPUISD::ROTQUAD_RZ_BYTES";
458 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
459 "SPUISD::ROTQUAD_RZ_BITS";
460 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
461 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
462 "SPUISD::ROTBYTES_LEFT_CHAINED";
463 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
464 "SPUISD::ROTBYTES_LEFT_BITS";
465 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
466 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
467 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
468 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
469 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
470 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
471 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
472 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
473 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
476 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
478 return ((i != node_names.end()) ? i->second : 0);
481 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
482 MVT VT = Op.getValueType();
483 return (VT.isInteger() ? VT : MVT(MVT::i32));
486 //===----------------------------------------------------------------------===//
487 // Calling convention code:
488 //===----------------------------------------------------------------------===//
490 #include "SPUGenCallingConv.inc"
492 //===----------------------------------------------------------------------===//
493 // LowerOperation implementation
494 //===----------------------------------------------------------------------===//
496 /// Aligned load common code for CellSPU
498 \param[in] Op The SelectionDAG load or store operand
499 \param[in] DAG The selection DAG
500 \param[in] ST CellSPU subtarget information structure
501 \param[in,out] alignment Caller initializes this to the load or store node's
502 value from getAlignment(), may be updated while generating the aligned load
503 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
504 offset (divisible by 16, modulo 16 == 0)
505 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
506 offset of the preferred slot (modulo 16 != 0)
507 \param[in,out] VT Caller initializes this value type to the the load or store
508 node's loaded or stored value type; may be updated if an i1-extended load or
510 \param[out] was16aligned true if the base pointer had 16-byte alignment,
511 otherwise false. Can help to determine if the chunk needs to be rotated.
513 Both load and store lowering load a block of data aligned on a 16-byte
514 boundary. This is the common aligned load code shared between both.
517 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
519 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
520 MVT &VT, bool &was16aligned)
522 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
523 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
524 SDValue basePtr = LSN->getBasePtr();
525 SDValue chain = LSN->getChain();
527 if (basePtr.getOpcode() == ISD::ADD) {
528 SDValue Op1 = basePtr.getNode()->getOperand(1);
530 if (Op1.getOpcode() == ISD::Constant
531 || Op1.getOpcode() == ISD::TargetConstant) {
532 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
534 alignOffs = (int) CN->getZExtValue();
535 prefSlotOffs = (int) (alignOffs & 0xf);
537 // Adjust the rotation amount to ensure that the final result ends up in
538 // the preferred slot:
539 prefSlotOffs -= vtm->prefslot_byte;
540 basePtr = basePtr.getOperand(0);
542 // Loading from memory, can we adjust alignment?
543 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
544 SDValue APtr = basePtr.getOperand(0);
545 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
546 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
547 alignment = GSDN->getGlobal()->getAlignment();
552 prefSlotOffs = -vtm->prefslot_byte;
554 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
555 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
556 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
557 prefSlotOffs = (int) (alignOffs & 0xf);
558 prefSlotOffs -= vtm->prefslot_byte;
559 basePtr = DAG.getRegister(SPU::R1, VT);
562 prefSlotOffs = -vtm->prefslot_byte;
565 if (alignment == 16) {
566 // Realign the base pointer as a D-Form address:
567 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
568 basePtr = DAG.getNode(ISD::ADD, PtrVT,
570 DAG.getConstant((alignOffs & ~0xf), PtrVT));
573 // Emit the vector load:
575 return DAG.getLoad(MVT::v16i8, chain, basePtr,
576 LSN->getSrcValue(), LSN->getSrcValueOffset(),
577 LSN->isVolatile(), 16);
580 // Unaligned load or we're using the "large memory" model, which means that
581 // we have to be very pessimistic:
582 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
583 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
584 DAG.getConstant(0, PtrVT));
588 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
589 DAG.getConstant((alignOffs & ~0xf), PtrVT));
590 was16aligned = false;
591 return DAG.getLoad(MVT::v16i8, chain, basePtr,
592 LSN->getSrcValue(), LSN->getSrcValueOffset(),
593 LSN->isVolatile(), 16);
596 /// Custom lower loads for CellSPU
598 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
599 within a 16-byte block, we have to rotate to extract the requested element.
602 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
603 LoadSDNode *LN = cast<LoadSDNode>(Op);
604 SDValue the_chain = LN->getChain();
605 MVT VT = LN->getMemoryVT();
606 MVT OpVT = Op.getNode()->getValueType(0);
607 ISD::LoadExtType ExtType = LN->getExtensionType();
608 unsigned alignment = LN->getAlignment();
611 switch (LN->getAddressingMode()) {
612 case ISD::UNINDEXED: {
616 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
618 if (result.getNode() == 0)
621 the_chain = result.getValue(1);
622 // Rotate the chunk if necessary
625 if (rotamt != 0 || !was16aligned) {
626 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
631 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
633 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
634 LoadSDNode *LN1 = cast<LoadSDNode>(result);
635 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
636 DAG.getConstant(rotamt, PtrVT));
639 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
640 the_chain = result.getValue(1);
643 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
645 MVT vecVT = MVT::v16i8;
647 // Convert the loaded v16i8 vector to the appropriate vector type
648 // specified by the operand:
651 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
653 vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
656 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
657 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
658 result = DAG.getNode(SPUISD::VEC2PREFSLOT_CHAINED, scalarvts, Ops, 2);
659 the_chain = result.getValue(1);
661 // Handle the sign and zero-extending loads for i1 and i8:
664 if (ExtType == ISD::SEXTLOAD) {
665 NewOpC = (OpVT == MVT::i1
666 ? SPUISD::EXTRACT_I1_SEXT
667 : SPUISD::EXTRACT_I8_SEXT);
669 assert(ExtType == ISD::ZEXTLOAD);
670 NewOpC = (OpVT == MVT::i1
671 ? SPUISD::EXTRACT_I1_ZEXT
672 : SPUISD::EXTRACT_I8_ZEXT);
675 result = DAG.getNode(NewOpC, OpVT, result);
678 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
679 SDValue retops[2] = {
684 result = DAG.getNode(SPUISD::LDRESULT, retvts,
685 retops, sizeof(retops) / sizeof(retops[0]));
692 case ISD::LAST_INDEXED_MODE:
693 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
695 cerr << (unsigned) LN->getAddressingMode() << "\n";
703 /// Custom lower stores for CellSPU
705 All CellSPU stores are aligned to 16-byte boundaries, so for elements
706 within a 16-byte block, we have to generate a shuffle to insert the
707 requested element into its place, then store the resulting block.
710 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
711 StoreSDNode *SN = cast<StoreSDNode>(Op);
712 SDValue Value = SN->getValue();
713 MVT VT = Value.getValueType();
714 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
715 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
716 unsigned alignment = SN->getAlignment();
718 switch (SN->getAddressingMode()) {
719 case ISD::UNINDEXED: {
720 int chunk_offset, slot_offset;
723 // The vector type we really want to load from the 16-byte chunk.
724 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
725 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
727 SDValue alignLoadVec =
728 AlignedLoad(Op, DAG, ST, SN, alignment,
729 chunk_offset, slot_offset, VT, was16aligned);
731 if (alignLoadVec.getNode() == 0)
734 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
735 SDValue basePtr = LN->getBasePtr();
736 SDValue the_chain = alignLoadVec.getValue(1);
737 SDValue theValue = SN->getValue();
741 && (theValue.getOpcode() == ISD::AssertZext
742 || theValue.getOpcode() == ISD::AssertSext)) {
743 // Drill down and get the value for zero- and sign-extended
745 theValue = theValue.getOperand(0);
750 SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
751 SDValue insertEltPtr;
753 // If the base pointer is already a D-form address, then just create
754 // a new D-form address with a slot offset and the orignal base pointer.
755 // Otherwise generate a D-form address with the slot offset relative
756 // to the stack pointer, which is always aligned.
757 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
758 DEBUG(basePtr.getNode()->dump(&DAG));
761 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
762 (basePtr.getOpcode() == ISD::ADD
763 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
764 insertEltPtr = basePtr;
766 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
769 SDValue insertEltOp =
770 DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltPtr);
771 SDValue vectorizeOp =
772 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
774 result = DAG.getNode(SPUISD::SHUFB, vecVT,
775 vectorizeOp, alignLoadVec,
776 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp));
778 result = DAG.getStore(the_chain, result, basePtr,
779 LN->getSrcValue(), LN->getSrcValueOffset(),
780 LN->isVolatile(), LN->getAlignment());
782 #if 0 && defined(NDEBUG)
783 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
784 const SDValue ¤tRoot = DAG.getRoot();
787 cerr << "------- CellSPU:LowerStore result:\n";
790 DAG.setRoot(currentRoot);
801 case ISD::LAST_INDEXED_MODE:
802 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
804 cerr << (unsigned) SN->getAddressingMode() << "\n";
812 /// Generate the address of a constant pool entry.
814 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
815 MVT PtrVT = Op.getValueType();
816 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
817 Constant *C = CP->getConstVal();
818 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
819 SDValue Zero = DAG.getConstant(0, PtrVT);
820 const TargetMachine &TM = DAG.getTarget();
822 if (TM.getRelocationModel() == Reloc::Static) {
823 if (!ST->usingLargeMem()) {
824 // Just return the SDValue with the constant pool address in it.
825 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
827 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
828 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
829 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
834 "LowerConstantPool: Relocation model other than static"
840 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
841 MVT PtrVT = Op.getValueType();
842 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
843 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
844 SDValue Zero = DAG.getConstant(0, PtrVT);
845 const TargetMachine &TM = DAG.getTarget();
847 if (TM.getRelocationModel() == Reloc::Static) {
848 if (!ST->usingLargeMem()) {
849 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
851 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
852 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
853 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
858 "LowerJumpTable: Relocation model other than static not supported.");
863 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
864 MVT PtrVT = Op.getValueType();
865 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
866 GlobalValue *GV = GSDN->getGlobal();
867 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
868 const TargetMachine &TM = DAG.getTarget();
869 SDValue Zero = DAG.getConstant(0, PtrVT);
871 if (TM.getRelocationModel() == Reloc::Static) {
872 if (!ST->usingLargeMem()) {
873 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
875 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
876 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
877 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
880 cerr << "LowerGlobalAddress: Relocation model other than static not "
889 //! Custom lower i64 integer constants
891 This code inserts all of the necessary juggling that needs to occur to load
892 a 64-bit constant into a register.
895 LowerConstant(SDValue Op, SelectionDAG &DAG) {
896 MVT VT = Op.getValueType();
898 if (VT == MVT::i64) {
899 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
900 SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
901 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
902 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
904 cerr << "LowerConstant: unhandled constant type "
914 //! Custom lower double precision floating point constants
916 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
917 MVT VT = Op.getValueType();
919 if (VT == MVT::f64) {
920 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
923 "LowerConstantFP: Node is not ConstantFPSDNode");
925 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
926 SDValue T = DAG.getConstant(dbits, MVT::i64);
927 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
928 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
929 DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
935 //! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
937 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
939 SDValue Cond = Op.getOperand(1);
940 MVT CondVT = Cond.getValueType();
943 if (CondVT == MVT::i8) {
945 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
947 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
950 return SDValue(); // Unchanged
954 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
956 MachineFunction &MF = DAG.getMachineFunction();
957 MachineFrameInfo *MFI = MF.getFrameInfo();
958 MachineRegisterInfo &RegInfo = MF.getRegInfo();
959 SmallVector<SDValue, 48> ArgValues;
960 SDValue Root = Op.getOperand(0);
961 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
963 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
964 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
966 unsigned ArgOffset = SPUFrameInfo::minStackSize();
967 unsigned ArgRegIdx = 0;
968 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
970 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
972 // Add DAG nodes to load the arguments or copy them out of registers.
973 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
974 ArgNo != e; ++ArgNo) {
975 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
976 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
979 if (ArgRegIdx < NumArgRegs) {
980 const TargetRegisterClass *ArgRegClass;
982 switch (ObjectVT.getSimpleVT()) {
984 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
985 << ObjectVT.getMVTString()
990 ArgRegClass = &SPU::R8CRegClass;
993 ArgRegClass = &SPU::R16CRegClass;
996 ArgRegClass = &SPU::R32CRegClass;
999 ArgRegClass = &SPU::R64CRegClass;
1002 ArgRegClass = &SPU::R32FPRegClass;
1005 ArgRegClass = &SPU::R64FPRegClass;
1013 ArgRegClass = &SPU::VECREGRegClass;
1017 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1018 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1019 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1022 // We need to load the argument to a virtual register if we determined
1023 // above that we ran out of physical registers of the appropriate type
1024 // or we're forced to do vararg
1025 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1026 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1027 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1028 ArgOffset += StackSlotSize;
1031 ArgValues.push_back(ArgVal);
1033 Root = ArgVal.getOperand(0);
1038 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1039 // We will spill (79-3)+1 registers to the stack
1040 SmallVector<SDValue, 79-3+1> MemOps;
1042 // Create the frame slot
1044 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1045 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1046 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1047 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1048 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1049 Root = Store.getOperand(0);
1050 MemOps.push_back(Store);
1052 // Increment address by stack slot size for the next stored argument
1053 ArgOffset += StackSlotSize;
1055 if (!MemOps.empty())
1056 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1059 ArgValues.push_back(Root);
1061 // Return the new list of results.
1062 return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(),
1063 &ArgValues[0], ArgValues.size());
1066 /// isLSAAddress - Return the immediate to use if the specified
1067 /// value is representable as a LSA address.
1068 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1069 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1072 int Addr = C->getZExtValue();
1073 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1074 (Addr << 14 >> 14) != Addr)
1075 return 0; // Top 14 bits have to be sext of immediate.
1077 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1082 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1083 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1084 SDValue Chain = TheCall->getChain();
1085 SDValue Callee = TheCall->getCallee();
1086 unsigned NumOps = TheCall->getNumArgs();
1087 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1088 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1089 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1091 // Handy pointer type
1092 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1094 // Accumulate how many bytes are to be pushed on the stack, including the
1095 // linkage area, and parameter passing area. According to the SPU ABI,
1096 // we minimally need space for [LR] and [SP]
1097 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1099 // Set up a copy of the stack pointer for use loading and storing any
1100 // arguments that may not fit in the registers available for argument
1102 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1104 // Figure out which arguments are going to go in registers, and which in
1106 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1107 unsigned ArgRegIdx = 0;
1109 // Keep track of registers passing arguments
1110 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1111 // And the arguments passed on the stack
1112 SmallVector<SDValue, 8> MemOpChains;
1114 for (unsigned i = 0; i != NumOps; ++i) {
1115 SDValue Arg = TheCall->getArg(i);
1117 // PtrOff will be used to store the current argument to the stack if a
1118 // register cannot be found for it.
1119 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1120 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1122 switch (Arg.getValueType().getSimpleVT()) {
1123 default: assert(0 && "Unexpected ValueType for argument!");
1127 if (ArgRegIdx != NumArgRegs) {
1128 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1130 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1131 ArgOffset += StackSlotSize;
1136 if (ArgRegIdx != NumArgRegs) {
1137 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1139 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1140 ArgOffset += StackSlotSize;
1147 if (ArgRegIdx != NumArgRegs) {
1148 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1150 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1151 ArgOffset += StackSlotSize;
1157 // Update number of stack bytes actually used, insert a call sequence start
1158 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1159 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1162 if (!MemOpChains.empty()) {
1163 // Adjust the stack pointer for the stack arguments.
1164 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1165 &MemOpChains[0], MemOpChains.size());
1168 // Build a sequence of copy-to-reg nodes chained together with token chain
1169 // and flag operands which copy the outgoing args into the appropriate regs.
1171 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1172 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1174 InFlag = Chain.getValue(1);
1177 SmallVector<SDValue, 8> Ops;
1178 unsigned CallOpc = SPUISD::CALL;
1180 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1181 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1182 // node so that legalize doesn't hack it.
1183 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1184 GlobalValue *GV = G->getGlobal();
1185 MVT CalleeVT = Callee.getValueType();
1186 SDValue Zero = DAG.getConstant(0, PtrVT);
1187 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1189 if (!ST->usingLargeMem()) {
1190 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1191 // style calls, otherwise, external symbols are BRASL calls. This assumes
1192 // that declared/defined symbols are in the same compilation unit and can
1193 // be reached through PC-relative jumps.
1196 // This may be an unsafe assumption for JIT and really large compilation
1198 if (GV->isDeclaration()) {
1199 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1201 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1204 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1206 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1208 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1209 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1210 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1211 // If this is an absolute destination address that appears to be a legal
1212 // local store address, use the munged value.
1213 Callee = SDValue(Dest, 0);
1216 Ops.push_back(Chain);
1217 Ops.push_back(Callee);
1219 // Add argument registers to the end of the list so that they are known live
1221 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1222 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1223 RegsToPass[i].second.getValueType()));
1225 if (InFlag.getNode())
1226 Ops.push_back(InFlag);
1227 // Returns a chain and a flag for retval copy to use.
1228 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1229 &Ops[0], Ops.size());
1230 InFlag = Chain.getValue(1);
1232 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1233 DAG.getIntPtrConstant(0, true), InFlag);
1234 if (TheCall->getValueType(0) != MVT::Other)
1235 InFlag = Chain.getValue(1);
1237 SDValue ResultVals[3];
1238 unsigned NumResults = 0;
1240 // If the call has results, copy the values out of the ret val registers.
1241 switch (TheCall->getValueType(0).getSimpleVT()) {
1242 default: assert(0 && "Unexpected ret value!");
1243 case MVT::Other: break;
1245 if (TheCall->getValueType(1) == MVT::i32) {
1246 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1247 ResultVals[0] = Chain.getValue(0);
1248 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1249 Chain.getValue(2)).getValue(1);
1250 ResultVals[1] = Chain.getValue(0);
1253 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1254 ResultVals[0] = Chain.getValue(0);
1259 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1260 ResultVals[0] = Chain.getValue(0);
1265 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1266 InFlag).getValue(1);
1267 ResultVals[0] = Chain.getValue(0);
1275 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1276 InFlag).getValue(1);
1277 ResultVals[0] = Chain.getValue(0);
1282 // If the function returns void, just return the chain.
1283 if (NumResults == 0)
1286 // Otherwise, merge everything together with a MERGE_VALUES node.
1287 ResultVals[NumResults++] = Chain;
1288 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1289 return Res.getValue(Op.getResNo());
1293 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1294 SmallVector<CCValAssign, 16> RVLocs;
1295 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1296 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1297 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1298 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1300 // If this is the first return lowered for this function, add the regs to the
1301 // liveout set for the function.
1302 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1303 for (unsigned i = 0; i != RVLocs.size(); ++i)
1304 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1307 SDValue Chain = Op.getOperand(0);
1310 // Copy the result values into the output registers.
1311 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1312 CCValAssign &VA = RVLocs[i];
1313 assert(VA.isRegLoc() && "Can only return in registers!");
1314 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1315 Flag = Chain.getValue(1);
1319 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1321 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1325 //===----------------------------------------------------------------------===//
1326 // Vector related lowering:
1327 //===----------------------------------------------------------------------===//
1329 static ConstantSDNode *
1330 getVecImm(SDNode *N) {
1331 SDValue OpVal(0, 0);
1333 // Check to see if this buildvec has a single non-undef value in its elements.
1334 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1335 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1336 if (OpVal.getNode() == 0)
1337 OpVal = N->getOperand(i);
1338 else if (OpVal != N->getOperand(i))
1342 if (OpVal.getNode() != 0) {
1343 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1348 return 0; // All UNDEF: use implicit def.; not Constant node
1351 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1352 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1354 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1356 if (ConstantSDNode *CN = getVecImm(N)) {
1357 uint64_t Value = CN->getZExtValue();
1358 if (ValueType == MVT::i64) {
1359 uint64_t UValue = CN->getZExtValue();
1360 uint32_t upper = uint32_t(UValue >> 32);
1361 uint32_t lower = uint32_t(UValue);
1364 Value = Value >> 32;
1366 if (Value <= 0x3ffff)
1367 return DAG.getTargetConstant(Value, ValueType);
1373 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1374 /// and the value fits into a signed 16-bit constant, and if so, return the
1376 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1378 if (ConstantSDNode *CN = getVecImm(N)) {
1379 int64_t Value = CN->getSExtValue();
1380 if (ValueType == MVT::i64) {
1381 uint64_t UValue = CN->getZExtValue();
1382 uint32_t upper = uint32_t(UValue >> 32);
1383 uint32_t lower = uint32_t(UValue);
1386 Value = Value >> 32;
1388 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1389 return DAG.getTargetConstant(Value, ValueType);
1396 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1397 /// and the value fits into a signed 10-bit constant, and if so, return the
1399 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1401 if (ConstantSDNode *CN = getVecImm(N)) {
1402 int64_t Value = CN->getSExtValue();
1403 if (ValueType == MVT::i64) {
1404 uint64_t UValue = CN->getZExtValue();
1405 uint32_t upper = uint32_t(UValue >> 32);
1406 uint32_t lower = uint32_t(UValue);
1409 Value = Value >> 32;
1411 if (isS10Constant(Value))
1412 return DAG.getTargetConstant(Value, ValueType);
1418 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1419 /// and the value fits into a signed 8-bit constant, and if so, return the
1422 /// @note: The incoming vector is v16i8 because that's the only way we can load
1423 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1425 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1427 if (ConstantSDNode *CN = getVecImm(N)) {
1428 int Value = (int) CN->getZExtValue();
1429 if (ValueType == MVT::i16
1430 && Value <= 0xffff /* truncated from uint64_t */
1431 && ((short) Value >> 8) == ((short) Value & 0xff))
1432 return DAG.getTargetConstant(Value & 0xff, ValueType);
1433 else if (ValueType == MVT::i8
1434 && (Value & 0xff) == Value)
1435 return DAG.getTargetConstant(Value, ValueType);
1441 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1442 /// and the value fits into a signed 16-bit constant, and if so, return the
1444 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1446 if (ConstantSDNode *CN = getVecImm(N)) {
1447 uint64_t Value = CN->getZExtValue();
1448 if ((ValueType == MVT::i32
1449 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1450 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1451 return DAG.getTargetConstant(Value >> 16, ValueType);
1457 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1458 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1459 if (ConstantSDNode *CN = getVecImm(N)) {
1460 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1466 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1467 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1468 if (ConstantSDNode *CN = getVecImm(N)) {
1469 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1475 // If this is a vector of constants or undefs, get the bits. A bit in
1476 // UndefBits is set if the corresponding element of the vector is an
1477 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1478 // zero. Return true if this is not an array of constants, false if it is.
1480 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1481 uint64_t UndefBits[2]) {
1482 // Start with zero'd results.
1483 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1485 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1486 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1487 SDValue OpVal = BV->getOperand(i);
1489 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1490 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1492 uint64_t EltBits = 0;
1493 if (OpVal.getOpcode() == ISD::UNDEF) {
1494 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1495 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1497 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1498 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1499 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1500 const APFloat &apf = CN->getValueAPF();
1501 EltBits = (CN->getValueType(0) == MVT::f32
1502 ? FloatToBits(apf.convertToFloat())
1503 : DoubleToBits(apf.convertToDouble()));
1505 // Nonconstant element.
1509 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1512 //printf("%llx %llx %llx %llx\n",
1513 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1517 /// If this is a splat (repetition) of a value across the whole vector, return
1518 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1519 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1520 /// SplatSize = 1 byte.
1521 static bool isConstantSplat(const uint64_t Bits128[2],
1522 const uint64_t Undef128[2],
1524 uint64_t &SplatBits, uint64_t &SplatUndef,
1526 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1527 // the same as the lower 64-bits, ignoring undefs.
1528 uint64_t Bits64 = Bits128[0] | Bits128[1];
1529 uint64_t Undef64 = Undef128[0] & Undef128[1];
1530 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1531 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1532 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1533 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1535 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1536 if (MinSplatBits < 64) {
1538 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1540 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1541 if (MinSplatBits < 32) {
1543 // If the top 16-bits are different than the lower 16-bits, ignoring
1544 // undefs, we have an i32 splat.
1545 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1546 if (MinSplatBits < 16) {
1547 // If the top 8-bits are different than the lower 8-bits, ignoring
1548 // undefs, we have an i16 splat.
1549 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1550 == ((Bits16 >> 8) & ~Undef16)) {
1551 // Otherwise, we have an 8-bit splat.
1552 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1553 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1559 SplatUndef = Undef16;
1566 SplatUndef = Undef32;
1572 SplatBits = Bits128[0];
1573 SplatUndef = Undef128[0];
1579 return false; // Can't be a splat if two pieces don't match.
1582 // If this is a case we can't handle, return null and let the default
1583 // expansion code take care of it. If we CAN select this case, and if it
1584 // selects to a single instruction, return Op. Otherwise, if we can codegen
1585 // this case more efficiently than a constant pool load, lower it to the
1586 // sequence of ops that should be used.
1587 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1588 MVT VT = Op.getValueType();
1589 // If this is a vector of constants or undefs, get the bits. A bit in
1590 // UndefBits is set if the corresponding element of the vector is an
1591 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1593 uint64_t VectorBits[2];
1594 uint64_t UndefBits[2];
1595 uint64_t SplatBits, SplatUndef;
1597 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1598 || !isConstantSplat(VectorBits, UndefBits,
1599 VT.getVectorElementType().getSizeInBits(),
1600 SplatBits, SplatUndef, SplatSize))
1601 return SDValue(); // Not a constant vector, not a splat.
1603 switch (VT.getSimpleVT()) {
1606 uint32_t Value32 = SplatBits;
1607 assert(SplatSize == 4
1608 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1609 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1610 SDValue T = DAG.getConstant(Value32, MVT::i32);
1611 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1612 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1616 uint64_t f64val = SplatBits;
1617 assert(SplatSize == 8
1618 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1619 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1620 SDValue T = DAG.getConstant(f64val, MVT::i64);
1621 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1622 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1626 // 8-bit constants have to be expanded to 16-bits
1627 unsigned short Value16 = SplatBits | (SplatBits << 8);
1629 for (int i = 0; i < 8; ++i)
1630 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1631 return DAG.getNode(ISD::BIT_CONVERT, VT,
1632 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1635 unsigned short Value16;
1637 Value16 = (unsigned short) (SplatBits & 0xffff);
1639 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1640 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1642 for (int i = 0; i < 8; ++i) Ops[i] = T;
1643 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1646 unsigned int Value = SplatBits;
1647 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1648 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1651 uint64_t val = SplatBits;
1652 uint32_t upper = uint32_t(val >> 32);
1653 uint32_t lower = uint32_t(val);
1655 if (upper == lower) {
1656 // Magic constant that can be matched by IL, ILA, et. al.
1657 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1658 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1662 SmallVector<SDValue, 16> ShufBytes;
1664 bool upper_special, lower_special;
1666 // NOTE: This code creates common-case shuffle masks that can be easily
1667 // detected as common expressions. It is not attempting to create highly
1668 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1670 // Detect if the upper or lower half is a special shuffle mask pattern:
1671 upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
1672 lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
1674 // Create lower vector if not a special pattern
1675 if (!lower_special) {
1676 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1677 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1678 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1679 LO32C, LO32C, LO32C, LO32C));
1682 // Create upper vector if not a special pattern
1683 if (!upper_special) {
1684 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1685 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1686 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1687 HI32C, HI32C, HI32C, HI32C));
1690 // If either upper or lower are special, then the two input operands are
1691 // the same (basically, one of them is a "don't care")
1696 if (lower_special && upper_special) {
1697 // Unhappy situation... both upper and lower are special, so punt with
1698 // a target constant:
1699 SDValue Zero = DAG.getConstant(0, MVT::i32);
1700 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1704 for (int i = 0; i < 4; ++i) {
1706 for (int j = 0; j < 4; ++j) {
1708 bool process_upper, process_lower;
1710 process_upper = (upper_special && (i & 1) == 0);
1711 process_lower = (lower_special && (i & 1) == 1);
1713 if (process_upper || process_lower) {
1714 if ((process_upper && upper == 0)
1715 || (process_lower && lower == 0))
1717 else if ((process_upper && upper == 0xffffffff)
1718 || (process_lower && lower == 0xffffffff))
1720 else if ((process_upper && upper == 0x80000000)
1721 || (process_lower && lower == 0x80000000))
1722 val |= (j == 0 ? 0xe0 : 0x80);
1724 val |= i * 4 + j + ((i & 1) * 16);
1727 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1730 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1731 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1732 &ShufBytes[0], ShufBytes.size()));
1740 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1741 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1742 /// permutation vector, V3, is monotonically increasing with one "exception"
1743 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1744 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1745 /// In either case, the net result is going to eventually invoke SHUFB to
1746 /// permute/shuffle the bytes from V1 and V2.
1748 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1749 /// control word for byte/halfword/word insertion. This takes care of a single
1750 /// element move from V2 into V1.
1752 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1753 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1754 SDValue V1 = Op.getOperand(0);
1755 SDValue V2 = Op.getOperand(1);
1756 SDValue PermMask = Op.getOperand(2);
1758 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1760 // If we have a single element being moved from V1 to V2, this can be handled
1761 // using the C*[DX] compute mask instructions, but the vector elements have
1762 // to be monotonically increasing with one exception element.
1763 MVT EltVT = V1.getValueType().getVectorElementType();
1764 unsigned EltsFromV2 = 0;
1766 unsigned V2EltIdx0 = 0;
1767 unsigned CurrElt = 0;
1768 bool monotonic = true;
1769 if (EltVT == MVT::i8)
1771 else if (EltVT == MVT::i16)
1773 else if (EltVT == MVT::i32)
1776 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1778 for (unsigned i = 0, e = PermMask.getNumOperands();
1779 EltsFromV2 <= 1 && monotonic && i != e;
1782 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1785 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1787 if (SrcElt >= V2EltIdx0) {
1789 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1790 } else if (CurrElt != SrcElt) {
1797 if (EltsFromV2 == 1 && monotonic) {
1798 // Compute mask and shuffle
1799 MachineFunction &MF = DAG.getMachineFunction();
1800 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1801 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1802 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1803 // Initialize temporary register to 0
1804 SDValue InitTempReg =
1805 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1806 // Copy register's contents as index in SHUFFLE_MASK:
1807 SDValue ShufMaskOp =
1808 DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
1809 DAG.getTargetConstant(V2Elt, MVT::i32),
1810 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1811 // Use shuffle mask in SHUFB synthetic instruction:
1812 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1814 // Convert the SHUFFLE_VECTOR mask's input element units to the
1816 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1818 SmallVector<SDValue, 16> ResultMask;
1819 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1821 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1824 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1826 for (unsigned j = 0; j < BytesPerElement; ++j) {
1827 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1832 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1833 &ResultMask[0], ResultMask.size());
1834 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1838 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1839 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1841 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1842 // For a constant, build the appropriate constant vector, which will
1843 // eventually simplify to a vector register load.
1845 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1846 SmallVector<SDValue, 16> ConstVecValues;
1850 // Create a constant vector:
1851 switch (Op.getValueType().getSimpleVT()) {
1852 default: assert(0 && "Unexpected constant value type in "
1853 "LowerSCALAR_TO_VECTOR");
1854 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1855 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1856 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1857 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1858 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1859 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1862 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1863 for (size_t j = 0; j < n_copies; ++j)
1864 ConstVecValues.push_back(CValue);
1866 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1867 &ConstVecValues[0], ConstVecValues.size());
1869 // Otherwise, copy the value from one register to another:
1870 switch (Op0.getValueType().getSimpleVT()) {
1871 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1878 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1885 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1886 switch (Op.getValueType().getSimpleVT()) {
1888 cerr << "CellSPU: Unknown vector multiplication, got "
1889 << Op.getValueType().getMVTString()
1895 SDValue rA = Op.getOperand(0);
1896 SDValue rB = Op.getOperand(1);
1897 SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1898 SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1899 SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1900 SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1902 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1906 // Multiply two v8i16 vectors (pipeline friendly version):
1907 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1908 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1909 // c) Use SELB to select upper and lower halves from the intermediate results
1911 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1912 // dual-issue. This code does manage to do this, even if it's a little on
1915 MachineFunction &MF = DAG.getMachineFunction();
1916 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1917 SDValue Chain = Op.getOperand(0);
1918 SDValue rA = Op.getOperand(0);
1919 SDValue rB = Op.getOperand(1);
1920 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1921 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1924 DAG.getCopyToReg(Chain, FSMBIreg,
1925 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1926 DAG.getConstant(0xcccc, MVT::i16)));
1929 DAG.getCopyToReg(FSMBOp, HiProdReg,
1930 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1932 SDValue HHProd_v4i32 =
1933 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1934 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1936 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1937 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1938 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1939 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1941 DAG.getConstant(16, MVT::i16))),
1942 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1945 // This M00sE is N@stI! (apologies to Monty Python)
1947 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1948 // is to break it all apart, sign extend, and reassemble the various
1949 // intermediate products.
1951 SDValue rA = Op.getOperand(0);
1952 SDValue rB = Op.getOperand(1);
1953 SDValue c8 = DAG.getConstant(8, MVT::i32);
1954 SDValue c16 = DAG.getConstant(16, MVT::i32);
1957 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1958 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1959 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1961 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1963 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1966 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1967 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1969 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1970 DAG.getConstant(0x2222, MVT::i16));
1972 SDValue LoProdParts =
1973 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1974 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1975 LLProd, LHProd, FSMBmask));
1977 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1980 DAG.getNode(ISD::AND, MVT::v4i32,
1982 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1983 LoProdMask, LoProdMask,
1984 LoProdMask, LoProdMask));
1987 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1988 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1991 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1992 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1995 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1996 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1997 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2000 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2001 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2002 DAG.getNode(SPUISD::VEC_SRA,
2003 MVT::v4i32, rAH, c8)),
2004 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2005 DAG.getNode(SPUISD::VEC_SRA,
2006 MVT::v4i32, rBH, c8)));
2009 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2011 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2015 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2017 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2018 DAG.getNode(ISD::OR, MVT::v4i32,
2026 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2027 MachineFunction &MF = DAG.getMachineFunction();
2028 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2030 SDValue A = Op.getOperand(0);
2031 SDValue B = Op.getOperand(1);
2032 MVT VT = Op.getValueType();
2034 unsigned VRegBR, VRegC;
2036 if (VT == MVT::f32) {
2037 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2038 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2040 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2041 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2043 // TODO: make sure we're feeding FPInterp the right arguments
2044 // Right now: fi B, frest(B)
2047 // (Floating Interpolate (FP Reciprocal Estimate B))
2049 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2050 DAG.getNode(SPUISD::FPInterp, VT, B,
2051 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2053 // Computes A * BRcpl and stores in a temporary register
2055 DAG.getCopyToReg(BRcpl, VRegC,
2056 DAG.getNode(ISD::FMUL, VT, A,
2057 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2058 // What's the Chain variable do? It's magic!
2059 // TODO: set Chain = Op(0).getEntryNode()
2061 return DAG.getNode(ISD::FADD, VT,
2062 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2063 DAG.getNode(ISD::FMUL, VT,
2064 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2065 DAG.getNode(ISD::FSUB, VT, A,
2066 DAG.getNode(ISD::FMUL, VT, B,
2067 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2070 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2071 MVT VT = Op.getValueType();
2072 SDValue N = Op.getOperand(0);
2073 SDValue Elt = Op.getOperand(1);
2076 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2077 // Constant argument:
2078 int EltNo = (int) C->getZExtValue();
2081 if (VT == MVT::i8 && EltNo >= 16)
2082 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2083 else if (VT == MVT::i16 && EltNo >= 8)
2084 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2085 else if (VT == MVT::i32 && EltNo >= 4)
2086 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2087 else if (VT == MVT::i64 && EltNo >= 2)
2088 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2090 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2091 // i32 and i64: Element 0 is the preferred slot
2092 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
2095 // Need to generate shuffle mask and extract:
2096 int prefslot_begin = -1, prefslot_end = -1;
2097 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2099 switch (VT.getSimpleVT()) {
2101 assert(false && "Invalid value type!");
2103 prefslot_begin = prefslot_end = 3;
2107 prefslot_begin = 2; prefslot_end = 3;
2112 prefslot_begin = 0; prefslot_end = 3;
2117 prefslot_begin = 0; prefslot_end = 7;
2122 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2123 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2125 unsigned int ShufBytes[16];
2126 for (int i = 0; i < 16; ++i) {
2127 // zero fill uppper part of preferred slot, don't care about the
2129 unsigned int mask_val;
2130 if (i <= prefslot_end) {
2132 ((i < prefslot_begin)
2134 : elt_byte + (i - prefslot_begin));
2136 ShufBytes[i] = mask_val;
2138 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
2141 SDValue ShufMask[4];
2142 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2143 unsigned bidx = i / 4;
2144 unsigned int bits = ((ShufBytes[bidx] << 24) |
2145 (ShufBytes[bidx+1] << 16) |
2146 (ShufBytes[bidx+2] << 8) |
2148 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2151 SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2153 sizeof(ShufMask) / sizeof(ShufMask[0]));
2155 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2156 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2157 N, N, ShufMaskVec));
2159 // Variable index: Rotate the requested element into slot 0, then replicate
2160 // slot 0 across the vector
2161 MVT VecVT = N.getValueType();
2162 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2163 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
2167 // Make life easier by making sure the index is zero-extended to i32
2168 if (Elt.getValueType() != MVT::i32)
2169 Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
2171 // Scale the index to a bit/byte shift quantity
2173 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2174 unsigned scaleShift = scaleFactor.logBase2();
2177 if (scaleShift > 0) {
2178 // Scale the shift factor:
2179 Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2180 DAG.getConstant(scaleShift, MVT::i32));
2183 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2185 // Replicate the bytes starting at byte 0 across the entire vector (for
2186 // consistency with the notion of a unified register set)
2189 switch (VT.getSimpleVT()) {
2191 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2195 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2196 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2201 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2202 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2208 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2209 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2215 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2216 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2217 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2218 loFactor, hiFactor);
2223 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2224 DAG.getNode(SPUISD::SHUFB, VecVT,
2225 vecShift, vecShift, replicate));
2231 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2232 SDValue VecOp = Op.getOperand(0);
2233 SDValue ValOp = Op.getOperand(1);
2234 SDValue IdxOp = Op.getOperand(2);
2235 MVT VT = Op.getValueType();
2237 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2238 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2240 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2241 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2242 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
2243 DAG.getRegister(SPU::R1, PtrVT),
2244 DAG.getConstant(CN->getSExtValue(), PtrVT));
2245 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
2248 DAG.getNode(SPUISD::SHUFB, VT,
2249 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2251 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
2256 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2258 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2260 assert(Op.getValueType() == MVT::i8);
2263 assert(0 && "Unhandled i8 math operator");
2267 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2269 SDValue N1 = Op.getOperand(1);
2270 N0 = (N0.getOpcode() != ISD::Constant
2271 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2272 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2274 N1 = (N1.getOpcode() != ISD::Constant
2275 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2276 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2278 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2279 DAG.getNode(Opc, MVT::i16, N0, N1));
2283 SDValue N1 = Op.getOperand(1);
2285 N0 = (N0.getOpcode() != ISD::Constant
2286 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2287 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2289 N1Opc = N1.getValueType().bitsLT(MVT::i32)
2292 N1 = (N1.getOpcode() != ISD::Constant
2293 ? DAG.getNode(N1Opc, MVT::i32, N1)
2294 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2297 DAG.getNode(ISD::OR, MVT::i16, N0,
2298 DAG.getNode(ISD::SHL, MVT::i16,
2299 N0, DAG.getConstant(8, MVT::i32)));
2300 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2301 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2305 SDValue N1 = Op.getOperand(1);
2307 N0 = (N0.getOpcode() != ISD::Constant
2308 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2309 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2311 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2314 N1 = (N1.getOpcode() != ISD::Constant
2315 ? DAG.getNode(N1Opc, MVT::i16, N1)
2316 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2318 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2319 DAG.getNode(Opc, MVT::i16, N0, N1));
2322 SDValue N1 = Op.getOperand(1);
2324 N0 = (N0.getOpcode() != ISD::Constant
2325 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2326 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2328 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2331 N1 = (N1.getOpcode() != ISD::Constant
2332 ? DAG.getNode(N1Opc, MVT::i16, N1)
2333 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2335 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2336 DAG.getNode(Opc, MVT::i16, N0, N1));
2339 SDValue N1 = Op.getOperand(1);
2341 N0 = (N0.getOpcode() != ISD::Constant
2342 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2343 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2345 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2346 N1 = (N1.getOpcode() != ISD::Constant
2347 ? DAG.getNode(N1Opc, MVT::i16, N1)
2348 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2350 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2351 DAG.getNode(Opc, MVT::i16, N0, N1));
2359 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2361 MVT VT = Op.getValueType();
2362 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2364 SDValue Op0 = Op.getOperand(0);
2367 case ISD::ZERO_EXTEND:
2368 case ISD::SIGN_EXTEND:
2369 case ISD::ANY_EXTEND: {
2370 MVT Op0VT = Op0.getValueType();
2371 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2373 assert(Op0VT == MVT::i32
2374 && "CellSPU: Zero/sign extending something other than i32");
2376 DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n");
2378 SDValue PromoteScalar =
2379 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2381 if (Opc != ISD::SIGN_EXTEND) {
2382 // Use a shuffle to zero extend the i32 to i64 directly:
2384 DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
2385 DAG.getConstant(0x80808080, MVT::i32),
2386 DAG.getConstant(0x00010203, MVT::i32),
2387 DAG.getConstant(0x80808080, MVT::i32),
2388 DAG.getConstant(0x08090a0b, MVT::i32));
2389 SDValue zextShuffle =
2390 DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2391 PromoteScalar, PromoteScalar, shufMask);
2393 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2394 DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle));
2396 // SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift
2397 // right and propagate the sign bit) instruction.
2399 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT,
2400 PromoteScalar, DAG.getConstant(4, MVT::i32));
2402 DAG.getNode(SPUISD::VEC_SRA, Op0VecVT,
2403 PromoteScalar, DAG.getConstant(32, MVT::i32));
2405 DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT,
2406 DAG.getConstant(0xf0f0, MVT::i16));
2407 SDValue CombineQuad =
2408 DAG.getNode(SPUISD::SELB, Op0VecVT,
2409 SignQuad, RotQuad, SelMask);
2411 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2412 DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad));
2417 // Turn operands into vectors to satisfy type checking (shufb works on
2420 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2422 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2423 SmallVector<SDValue, 16> ShufBytes;
2425 // Create the shuffle mask for "rotating" the borrow up one register slot
2426 // once the borrow is generated.
2427 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2428 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2429 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2430 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2433 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2434 SDValue ShiftedCarry =
2435 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2437 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2438 &ShufBytes[0], ShufBytes.size()));
2440 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2441 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2442 Op0, Op1, ShiftedCarry));
2446 // Turn operands into vectors to satisfy type checking (shufb works on
2449 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2451 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2452 SmallVector<SDValue, 16> ShufBytes;
2454 // Create the shuffle mask for "rotating" the borrow up one register slot
2455 // once the borrow is generated.
2456 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2457 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2458 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2459 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2462 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2463 SDValue ShiftedBorrow =
2464 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2465 BorrowGen, BorrowGen,
2466 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2467 &ShufBytes[0], ShufBytes.size()));
2469 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2470 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2471 Op0, Op1, ShiftedBorrow));
2475 SDValue ShiftAmt = Op.getOperand(1);
2476 MVT ShiftAmtVT = ShiftAmt.getValueType();
2477 SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2479 DAG.getNode(SPUISD::SELB, VecVT,
2481 DAG.getConstant(0, VecVT),
2482 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2483 DAG.getConstant(0xff00ULL, MVT::i16)));
2484 SDValue ShiftAmtBytes =
2485 DAG.getNode(ISD::SRL, ShiftAmtVT,
2487 DAG.getConstant(3, ShiftAmtVT));
2488 SDValue ShiftAmtBits =
2489 DAG.getNode(ISD::AND, ShiftAmtVT,
2491 DAG.getConstant(7, ShiftAmtVT));
2493 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2494 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2495 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2496 MaskLower, ShiftAmtBytes),
2501 MVT VT = Op.getValueType();
2502 SDValue ShiftAmt = Op.getOperand(1);
2503 MVT ShiftAmtVT = ShiftAmt.getValueType();
2504 SDValue ShiftAmtBytes =
2505 DAG.getNode(ISD::SRL, ShiftAmtVT,
2507 DAG.getConstant(3, ShiftAmtVT));
2508 SDValue ShiftAmtBits =
2509 DAG.getNode(ISD::AND, ShiftAmtVT,
2511 DAG.getConstant(7, ShiftAmtVT));
2513 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2514 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2515 Op0, ShiftAmtBytes),
2520 // Promote Op0 to vector
2522 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2523 SDValue ShiftAmt = Op.getOperand(1);
2524 MVT ShiftVT = ShiftAmt.getValueType();
2526 // Negate variable shift amounts
2527 if (!isa<ConstantSDNode>(ShiftAmt)) {
2528 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2529 DAG.getConstant(0, ShiftVT), ShiftAmt);
2532 SDValue UpperHalfSign =
2533 DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32,
2534 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2535 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2536 Op0, DAG.getConstant(31, MVT::i32))));
2537 SDValue UpperHalfSignMask =
2538 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2539 SDValue UpperLowerMask =
2540 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2541 DAG.getConstant(0xff00, MVT::i16));
2542 SDValue UpperLowerSelect =
2543 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2544 UpperHalfSignMask, Op0, UpperLowerMask);
2545 SDValue RotateLeftBytes =
2546 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2547 UpperLowerSelect, ShiftAmt);
2548 SDValue RotateLeftBits =
2549 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2550 RotateLeftBytes, ShiftAmt);
2552 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2560 //! Lower byte immediate operations for v16i8 vectors:
2562 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2565 MVT VT = Op.getValueType();
2567 ConstVec = Op.getOperand(0);
2568 Arg = Op.getOperand(1);
2569 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2570 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2571 ConstVec = ConstVec.getOperand(0);
2573 ConstVec = Op.getOperand(1);
2574 Arg = Op.getOperand(0);
2575 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2576 ConstVec = ConstVec.getOperand(0);
2581 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2582 uint64_t VectorBits[2];
2583 uint64_t UndefBits[2];
2584 uint64_t SplatBits, SplatUndef;
2587 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2588 && isConstantSplat(VectorBits, UndefBits,
2589 VT.getVectorElementType().getSizeInBits(),
2590 SplatBits, SplatUndef, SplatSize)) {
2592 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2593 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2595 // Turn the BUILD_VECTOR into a set of target constants:
2596 for (size_t i = 0; i < tcVecSize; ++i)
2599 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2600 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2603 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2604 // lowered. Return the operation, rather than a null SDValue.
2608 //! Lower i32 multiplication
2609 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2611 switch (VT.getSimpleVT()) {
2613 cerr << "CellSPU: Unknown LowerMUL value type, got "
2614 << Op.getValueType().getMVTString()
2620 SDValue rA = Op.getOperand(0);
2621 SDValue rB = Op.getOperand(1);
2623 return DAG.getNode(ISD::ADD, MVT::i32,
2624 DAG.getNode(ISD::ADD, MVT::i32,
2625 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2626 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2627 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2634 //! Custom lowering for CTPOP (count population)
2636 Custom lowering code that counts the number ones in the input
2637 operand. SPU has such an instruction, but it counts the number of
2638 ones per byte, which then have to be accumulated.
2640 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2641 MVT VT = Op.getValueType();
2642 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2644 switch (VT.getSimpleVT()) {
2646 assert(false && "Invalid value type!");
2648 SDValue N = Op.getOperand(0);
2649 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2651 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2652 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2654 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2658 MachineFunction &MF = DAG.getMachineFunction();
2659 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2661 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2663 SDValue N = Op.getOperand(0);
2664 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2665 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2666 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2668 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2669 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2671 // CNTB_result becomes the chain to which all of the virtual registers
2672 // CNTB_reg, SUM1_reg become associated:
2673 SDValue CNTB_result =
2674 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2676 SDValue CNTB_rescopy =
2677 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2679 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2681 return DAG.getNode(ISD::AND, MVT::i16,
2682 DAG.getNode(ISD::ADD, MVT::i16,
2683 DAG.getNode(ISD::SRL, MVT::i16,
2690 MachineFunction &MF = DAG.getMachineFunction();
2691 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2693 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2694 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2696 SDValue N = Op.getOperand(0);
2697 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2698 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2699 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2700 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2702 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2703 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2705 // CNTB_result becomes the chain to which all of the virtual registers
2706 // CNTB_reg, SUM1_reg become associated:
2707 SDValue CNTB_result =
2708 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2710 SDValue CNTB_rescopy =
2711 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2714 DAG.getNode(ISD::SRL, MVT::i32,
2715 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2718 DAG.getNode(ISD::ADD, MVT::i32,
2719 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2721 SDValue Sum1_rescopy =
2722 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2725 DAG.getNode(ISD::SRL, MVT::i32,
2726 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2729 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2730 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2732 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2742 //! Lower ISD::SELECT_CC
2744 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2747 \note Need to revisit this in the future: if the code path through the true
2748 and false value computations is longer than the latency of a branch (6
2749 cycles), then it would be more advantageous to branch and insert a new basic
2750 block and branch on the condition. However, this code does not make that
2751 assumption, given the simplisitc uses so far.
2754 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
2755 MVT VT = Op.getValueType();
2756 SDValue lhs = Op.getOperand(0);
2757 SDValue rhs = Op.getOperand(1);
2758 SDValue trueval = Op.getOperand(2);
2759 SDValue falseval = Op.getOperand(3);
2760 SDValue condition = Op.getOperand(4);
2762 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2763 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2764 // with another "cannot select select_cc" assert:
2766 SDValue compare = DAG.getNode(ISD::SETCC, VT, lhs, rhs, condition);
2767 return DAG.getNode(SPUISD::SELB, VT, trueval, falseval, compare);
2770 //! Custom lower ISD::TRUNCATE
2771 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2773 MVT VT = Op.getValueType();
2774 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2775 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2777 SDValue Op0 = Op.getOperand(0);
2778 MVT Op0VT = Op0.getValueType();
2779 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2781 SDValue PromoteScalar = DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2786 // Create shuffle mask
2787 switch (Op0VT.getSimpleVT()) {
2791 // least significant doubleword of quadword
2792 maskHigh = 0x08090a0b;
2793 maskLow = 0x0c0d0e0f;
2796 // least significant word of quadword
2797 maskHigh = maskLow = 0x0c0d0e0f;
2800 // least significant halfword of quadword
2801 maskHigh = maskLow = 0x0e0f0e0f;
2804 // least significant byte of quadword
2805 maskHigh = maskLow = 0x0f0f0f0f;
2808 cerr << "Truncation to illegal type!";
2815 // least significant word of doubleword
2816 maskHigh = maskLow = 0x04050607;
2819 // least significant halfword of doubleword
2820 maskHigh = maskLow = 0x06070607;
2823 // least significant byte of doubleword
2824 maskHigh = maskLow = 0x07070707;
2827 cerr << "Truncation to illegal type!";
2835 // least significant halfword of word
2836 maskHigh = maskLow = 0x02030203;
2839 // least significant byte of word/halfword
2840 maskHigh = maskLow = 0x03030303;
2843 cerr << "Truncation to illegal type!";
2848 cerr << "Trying to lower truncation from illegal type!";
2852 // Use a shuffle to perform the truncation
2853 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2854 DAG.getConstant(maskHigh, MVT::i32),
2855 DAG.getConstant(maskLow, MVT::i32),
2856 DAG.getConstant(maskHigh, MVT::i32),
2857 DAG.getConstant(maskLow, MVT::i32));
2859 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2860 PromoteScalar, PromoteScalar, shufMask);
2862 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2863 DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
2866 //! Custom (target-specific) lowering entry point
2868 This is where LLVM's DAG selection process calls to do target-specific
2872 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2874 unsigned Opc = (unsigned) Op.getOpcode();
2875 MVT VT = Op.getValueType();
2879 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2880 cerr << "Op.getOpcode() = " << Opc << "\n";
2881 cerr << "*Op.getNode():\n";
2882 Op.getNode()->dump();
2889 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2891 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2892 case ISD::ConstantPool:
2893 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2894 case ISD::GlobalAddress:
2895 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2896 case ISD::JumpTable:
2897 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2899 return LowerConstant(Op, DAG);
2900 case ISD::ConstantFP:
2901 return LowerConstantFP(Op, DAG);
2903 return LowerBRCOND(Op, DAG);
2904 case ISD::FORMAL_ARGUMENTS:
2905 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2907 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2909 return LowerRET(Op, DAG, getTargetMachine());
2912 // i8, i64 math ops:
2913 case ISD::ZERO_EXTEND:
2914 case ISD::SIGN_EXTEND:
2915 case ISD::ANY_EXTEND:
2924 return LowerI8Math(Op, DAG, Opc);
2925 else if (VT == MVT::i64)
2926 return LowerI64Math(Op, DAG, Opc);
2930 // Vector-related lowering.
2931 case ISD::BUILD_VECTOR:
2932 return LowerBUILD_VECTOR(Op, DAG);
2933 case ISD::SCALAR_TO_VECTOR:
2934 return LowerSCALAR_TO_VECTOR(Op, DAG);
2935 case ISD::VECTOR_SHUFFLE:
2936 return LowerVECTOR_SHUFFLE(Op, DAG);
2937 case ISD::EXTRACT_VECTOR_ELT:
2938 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2939 case ISD::INSERT_VECTOR_ELT:
2940 return LowerINSERT_VECTOR_ELT(Op, DAG);
2942 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2946 return LowerByteImmed(Op, DAG);
2948 // Vector and i8 multiply:
2951 return LowerVectorMUL(Op, DAG);
2952 else if (VT == MVT::i8)
2953 return LowerI8Math(Op, DAG, Opc);
2955 return LowerMUL(Op, DAG, VT, Opc);
2958 if (VT == MVT::f32 || VT == MVT::v4f32)
2959 return LowerFDIVf32(Op, DAG);
2961 // This is probably a libcall
2962 else if (Op.getValueType() == MVT::f64)
2963 return LowerFDIVf64(Op, DAG);
2966 assert(0 && "Calling FDIV on unsupported MVT");
2969 return LowerCTPOP(Op, DAG);
2971 case ISD::SELECT_CC:
2972 return LowerSELECT_CC(Op, DAG);
2975 return LowerTRUNCATE(Op, DAG);
2981 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2982 SmallVectorImpl<SDValue>&Results,
2986 unsigned Opc = (unsigned) N->getOpcode();
2987 MVT OpVT = N->getValueType(0);
2991 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2992 cerr << "Op.getOpcode() = " << Opc << "\n";
2993 cerr << "*Op.getNode():\n";
3001 /* Otherwise, return unchanged */
3004 //===----------------------------------------------------------------------===//
3005 // Target Optimization Hooks
3006 //===----------------------------------------------------------------------===//
3009 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
3012 TargetMachine &TM = getTargetMachine();
3014 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
3015 SelectionDAG &DAG = DCI.DAG;
3016 SDValue Op0 = N->getOperand(0); // everything has at least one operand
3017 MVT NodeVT = N->getValueType(0); // The node's value type
3018 MVT Op0VT = Op0.getValueType(); // The first operand's result
3019 SDValue Result; // Initially, empty result
3021 switch (N->getOpcode()) {
3024 SDValue Op1 = N->getOperand(1);
3026 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
3027 SDValue Op01 = Op0.getOperand(1);
3028 if (Op01.getOpcode() == ISD::Constant
3029 || Op01.getOpcode() == ISD::TargetConstant) {
3030 // (add <const>, (SPUindirect <arg>, <const>)) ->
3031 // (SPUindirect <arg>, <const + const>)
3032 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
3033 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
3034 SDValue combinedConst =
3035 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
3037 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
3038 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
3039 DEBUG(cerr << "With: (SPUindirect <arg>, "
3040 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
3041 return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
3042 Op0.getOperand(0), combinedConst);
3044 } else if (isa<ConstantSDNode>(Op0)
3045 && Op1.getOpcode() == SPUISD::IndirectAddr) {
3046 SDValue Op11 = Op1.getOperand(1);
3047 if (Op11.getOpcode() == ISD::Constant
3048 || Op11.getOpcode() == ISD::TargetConstant) {
3049 // (add (SPUindirect <arg>, <const>), <const>) ->
3050 // (SPUindirect <arg>, <const + const>)
3051 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
3052 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
3053 SDValue combinedConst =
3054 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
3056 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
3057 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
3058 DEBUG(cerr << "With: (SPUindirect <arg>, "
3059 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
3061 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
3062 Op1.getOperand(0), combinedConst);
3067 case ISD::SIGN_EXTEND:
3068 case ISD::ZERO_EXTEND:
3069 case ISD::ANY_EXTEND: {
3070 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
3071 // (any_extend (SPUextract_elt0 <arg>)) ->
3072 // (SPUextract_elt0 <arg>)
3073 // Types must match, however...
3074 DEBUG(cerr << "Replace: ");
3075 DEBUG(N->dump(&DAG));
3076 DEBUG(cerr << "\nWith: ");
3077 DEBUG(Op0.getNode()->dump(&DAG));
3078 DEBUG(cerr << "\n");
3084 case SPUISD::IndirectAddr: {
3085 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
3086 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
3087 if (CN->getZExtValue() == 0) {
3088 // (SPUindirect (SPUaform <addr>, 0), 0) ->
3089 // (SPUaform <addr>, 0)
3091 DEBUG(cerr << "Replace: ");
3092 DEBUG(N->dump(&DAG));
3093 DEBUG(cerr << "\nWith: ");
3094 DEBUG(Op0.getNode()->dump(&DAG));
3095 DEBUG(cerr << "\n");
3102 case SPUISD::SHLQUAD_L_BITS:
3103 case SPUISD::SHLQUAD_L_BYTES:
3104 case SPUISD::VEC_SHL:
3105 case SPUISD::VEC_SRL:
3106 case SPUISD::VEC_SRA:
3107 case SPUISD::ROTQUAD_RZ_BYTES:
3108 case SPUISD::ROTQUAD_RZ_BITS: {
3109 SDValue Op1 = N->getOperand(1);
3111 if (isa<ConstantSDNode>(Op1)) {
3112 // Kill degenerate vector shifts:
3113 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
3114 if (CN->getZExtValue() == 0) {
3120 case SPUISD::PROMOTE_SCALAR: {
3121 switch (Op0.getOpcode()) {
3124 case ISD::ANY_EXTEND:
3125 case ISD::ZERO_EXTEND:
3126 case ISD::SIGN_EXTEND: {
3127 // (SPUpromote_scalar (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
3129 // but only if the SPUpromote_scalar and <arg> types match.
3130 SDValue Op00 = Op0.getOperand(0);
3131 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
3132 SDValue Op000 = Op00.getOperand(0);
3133 if (Op000.getValueType() == NodeVT) {
3139 case SPUISD::VEC2PREFSLOT: {
3140 // (SPUpromote_scalar (SPUvec2prefslot <arg>)) ->
3142 Result = Op0.getOperand(0);
3149 // Otherwise, return unchanged.
3151 if (Result.getNode()) {
3152 DEBUG(cerr << "\nReplace.SPU: ");
3153 DEBUG(N->dump(&DAG));
3154 DEBUG(cerr << "\nWith: ");
3155 DEBUG(Result.getNode()->dump(&DAG));
3156 DEBUG(cerr << "\n");
3163 //===----------------------------------------------------------------------===//
3164 // Inline Assembly Support
3165 //===----------------------------------------------------------------------===//
3167 /// getConstraintType - Given a constraint letter, return the type of
3168 /// constraint it is for this target.
3169 SPUTargetLowering::ConstraintType
3170 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3171 if (ConstraintLetter.size() == 1) {
3172 switch (ConstraintLetter[0]) {
3179 return C_RegisterClass;
3182 return TargetLowering::getConstraintType(ConstraintLetter);
3185 std::pair<unsigned, const TargetRegisterClass*>
3186 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3189 if (Constraint.size() == 1) {
3190 // GCC RS6000 Constraint Letters
3191 switch (Constraint[0]) {
3195 return std::make_pair(0U, SPU::R64CRegisterClass);
3196 return std::make_pair(0U, SPU::R32CRegisterClass);
3199 return std::make_pair(0U, SPU::R32FPRegisterClass);
3200 else if (VT == MVT::f64)
3201 return std::make_pair(0U, SPU::R64FPRegisterClass);
3204 return std::make_pair(0U, SPU::GPRCRegisterClass);
3208 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3211 //! Compute used/known bits for a SPU operand
3213 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3217 const SelectionDAG &DAG,
3218 unsigned Depth ) const {
3220 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
3223 switch (Op.getOpcode()) {
3225 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3235 case SPUISD::PROMOTE_SCALAR: {
3236 SDValue Op0 = Op.getOperand(0);
3237 MVT Op0VT = Op0.getValueType();
3238 unsigned Op0VTBits = Op0VT.getSizeInBits();
3239 uint64_t InMask = Op0VT.getIntegerVTBitMask();
3240 KnownZero |= APInt(Op0VTBits, ~InMask, false);
3241 KnownOne |= APInt(Op0VTBits, InMask, false);
3245 case SPUISD::LDRESULT:
3246 case SPUISD::VEC2PREFSLOT:
3247 case SPUISD::VEC2PREFSLOT_CHAINED: {
3248 MVT OpVT = Op.getValueType();
3249 unsigned OpVTBits = OpVT.getSizeInBits();
3250 uint64_t InMask = OpVT.getIntegerVTBitMask();
3251 KnownZero |= APInt(OpVTBits, ~InMask, false);
3252 KnownOne |= APInt(OpVTBits, InMask, false);
3257 case EXTRACT_I1_ZEXT:
3258 case EXTRACT_I1_SEXT:
3259 case EXTRACT_I8_ZEXT:
3260 case EXTRACT_I8_SEXT:
3265 case SPUISD::SHLQUAD_L_BITS:
3266 case SPUISD::SHLQUAD_L_BYTES:
3267 case SPUISD::VEC_SHL:
3268 case SPUISD::VEC_SRL:
3269 case SPUISD::VEC_SRA:
3270 case SPUISD::VEC_ROTL:
3271 case SPUISD::VEC_ROTR:
3272 case SPUISD::ROTQUAD_RZ_BYTES:
3273 case SPUISD::ROTQUAD_RZ_BITS:
3274 case SPUISD::ROTBYTES_LEFT:
3275 case SPUISD::ROTBYTES_LEFT_CHAINED:
3276 case SPUISD::SELECT_MASK:
3278 case SPUISD::FPInterp:
3279 case SPUISD::FPRecipEst:
3280 case SPUISD::SEXT32TO64:
3285 // LowerAsmOperandForConstraint
3287 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3288 char ConstraintLetter,
3290 std::vector<SDValue> &Ops,
3291 SelectionDAG &DAG) const {
3292 // Default, for the time being, to the base class handler
3293 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3297 /// isLegalAddressImmediate - Return true if the integer value can be used
3298 /// as the offset of the target addressing mode.
3299 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3300 const Type *Ty) const {
3301 // SPU's addresses are 256K:
3302 return (V > -(1 << 18) && V < (1 << 18) - 1);
3305 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3310 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3311 // The SPU target isn't yet aware of offsets.