1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT mapping to useful data for Cell SPU
41 struct valtype_map_s {
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDValue &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDValue &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // SPU has no sign or zero extended loads for i1, i8, i16:
134 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
135 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
136 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
137 setTruncStoreAction(MVT::i8, MVT::i1, Promote);
138 setTruncStoreAction(MVT::i16 , MVT::i1, Custom);
139 setTruncStoreAction(MVT::i32 , MVT::i1, Custom);
140 setTruncStoreAction(MVT::i64 , MVT::i1, Custom);
141 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
143 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
144 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
145 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
146 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
147 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
148 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
149 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
150 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
152 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
153 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
154 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
156 // SPU constant load actions are custom lowered:
157 setOperationAction(ISD::Constant, MVT::i64, Custom);
158 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
159 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
161 // SPU's loads and stores have to be custom lowered:
162 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
164 MVT VT = (MVT::SimpleValueType)sctype;
166 setOperationAction(ISD::LOAD, VT, Custom);
167 setOperationAction(ISD::STORE, VT, Custom);
170 // Custom lower BRCOND for i1, i8 to "promote" the result to
171 // i32 and i16, respectively.
172 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
174 // Expand the jumptable branches
175 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
176 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
177 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
179 // SPU has no intrinsics for these particular operations:
180 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
182 // PowerPC has no SREM/UREM instructions
183 setOperationAction(ISD::SREM, MVT::i32, Expand);
184 setOperationAction(ISD::UREM, MVT::i32, Expand);
185 setOperationAction(ISD::SREM, MVT::i64, Expand);
186 setOperationAction(ISD::UREM, MVT::i64, Expand);
188 // We don't support sin/cos/sqrt/fmod
189 setOperationAction(ISD::FSIN , MVT::f64, Expand);
190 setOperationAction(ISD::FCOS , MVT::f64, Expand);
191 setOperationAction(ISD::FREM , MVT::f64, Expand);
192 setOperationAction(ISD::FSIN , MVT::f32, Expand);
193 setOperationAction(ISD::FCOS , MVT::f32, Expand);
194 setOperationAction(ISD::FREM , MVT::f32, Expand);
196 // If we're enabling GP optimizations, use hardware square root
197 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
198 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
200 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
201 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
203 // SPU can do rotate right and left, so legalize it... but customize for i8
204 // because instructions don't exist.
206 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
208 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
209 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
210 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
212 setOperationAction(ISD::ROTL, MVT::i32, Legal);
213 setOperationAction(ISD::ROTL, MVT::i16, Legal);
214 setOperationAction(ISD::ROTL, MVT::i8, Custom);
215 // SPU has no native version of shift left/right for i8
216 setOperationAction(ISD::SHL, MVT::i8, Custom);
217 setOperationAction(ISD::SRL, MVT::i8, Custom);
218 setOperationAction(ISD::SRA, MVT::i8, Custom);
219 // And SPU needs custom lowering for shift left/right for i64
220 setOperationAction(ISD::SHL, MVT::i64, Custom);
221 setOperationAction(ISD::SRL, MVT::i64, Custom);
222 setOperationAction(ISD::SRA, MVT::i64, Custom);
224 // Custom lower i8, i32 and i64 multiplications
225 setOperationAction(ISD::MUL, MVT::i8, Custom);
226 setOperationAction(ISD::MUL, MVT::i32, Custom);
227 setOperationAction(ISD::MUL, MVT::i64, Custom);
229 // Need to custom handle (some) common i8, i64 math ops
230 setOperationAction(ISD::ADD, MVT::i64, Custom);
231 setOperationAction(ISD::SUB, MVT::i8, Custom);
232 setOperationAction(ISD::SUB, MVT::i64, Custom);
234 // SPU does not have BSWAP. It does have i32 support CTLZ.
235 // CTPOP has to be custom lowered.
236 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
237 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
239 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
240 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
241 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
242 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
244 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
245 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
247 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
249 // SPU has a version of select that implements (a&~c)|(b&c), just like
250 // select ought to work:
251 setOperationAction(ISD::SELECT, MVT::i1, Promote);
252 setOperationAction(ISD::SELECT, MVT::i8, Legal);
253 setOperationAction(ISD::SELECT, MVT::i16, Legal);
254 setOperationAction(ISD::SELECT, MVT::i32, Legal);
255 setOperationAction(ISD::SELECT, MVT::i64, Expand);
257 setOperationAction(ISD::SETCC, MVT::i1, Promote);
258 setOperationAction(ISD::SETCC, MVT::i8, Legal);
259 setOperationAction(ISD::SETCC, MVT::i16, Legal);
260 setOperationAction(ISD::SETCC, MVT::i32, Legal);
261 setOperationAction(ISD::SETCC, MVT::i64, Expand);
263 // Zero extension and sign extension for i64 have to be
265 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
266 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
267 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
269 // SPU has a legal FP -> signed INT instruction
270 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
271 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
272 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
273 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
275 // FDIV on SPU requires custom lowering
276 setOperationAction(ISD::FDIV, MVT::f32, Custom);
277 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
279 // SPU has [U|S]INT_TO_FP
280 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
281 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
282 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
283 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
284 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
285 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
286 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
287 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
289 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
290 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
291 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
292 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
294 // We cannot sextinreg(i1). Expand to shifts.
295 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
297 // Support label based line numbers.
298 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
299 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
301 // We want to legalize GlobalAddress and ConstantPool nodes into the
302 // appropriate instructions to materialize the address.
303 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
305 MVT VT = (MVT::SimpleValueType)sctype;
307 setOperationAction(ISD::GlobalAddress, VT, Custom);
308 setOperationAction(ISD::ConstantPool, VT, Custom);
309 setOperationAction(ISD::JumpTable, VT, Custom);
312 // RET must be custom lowered, to meet ABI requirements
313 setOperationAction(ISD::RET, MVT::Other, Custom);
315 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
316 setOperationAction(ISD::VASTART , MVT::Other, Custom);
318 // Use the default implementation.
319 setOperationAction(ISD::VAARG , MVT::Other, Expand);
320 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
321 setOperationAction(ISD::VAEND , MVT::Other, Expand);
322 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
323 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
324 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
325 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
327 // Cell SPU has instructions for converting between i64 and fp.
328 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
329 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
331 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
332 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
334 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
335 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
337 // First set operation action for all vector types to expand. Then we
338 // will selectively turn on ones that can be effectively codegen'd.
339 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
340 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
341 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
342 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
343 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
344 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
346 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
347 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
348 MVT VT = (MVT::SimpleValueType)i;
350 // add/sub are legal for all supported vector VT's.
351 setOperationAction(ISD::ADD , VT, Legal);
352 setOperationAction(ISD::SUB , VT, Legal);
353 // mul has to be custom lowered.
354 setOperationAction(ISD::MUL , VT, Custom);
356 setOperationAction(ISD::AND , VT, Legal);
357 setOperationAction(ISD::OR , VT, Legal);
358 setOperationAction(ISD::XOR , VT, Legal);
359 setOperationAction(ISD::LOAD , VT, Legal);
360 setOperationAction(ISD::SELECT, VT, Legal);
361 setOperationAction(ISD::STORE, VT, Legal);
363 // These operations need to be expanded:
364 setOperationAction(ISD::SDIV, VT, Expand);
365 setOperationAction(ISD::SREM, VT, Expand);
366 setOperationAction(ISD::UDIV, VT, Expand);
367 setOperationAction(ISD::UREM, VT, Expand);
368 setOperationAction(ISD::FDIV, VT, Custom);
370 // Custom lower build_vector, constant pool spills, insert and
371 // extract vector elements:
372 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
373 setOperationAction(ISD::ConstantPool, VT, Custom);
374 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
375 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
376 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
377 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
380 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
381 setOperationAction(ISD::AND, MVT::v16i8, Custom);
382 setOperationAction(ISD::OR, MVT::v16i8, Custom);
383 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
384 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
386 setShiftAmountType(MVT::i32);
387 setSetCCResultContents(ZeroOrOneSetCCResult);
389 setStackPointerRegisterToSaveRestore(SPU::R1);
391 // We have target-specific dag combine patterns for the following nodes:
392 setTargetDAGCombine(ISD::ADD);
393 setTargetDAGCombine(ISD::ZERO_EXTEND);
394 setTargetDAGCombine(ISD::SIGN_EXTEND);
395 setTargetDAGCombine(ISD::ANY_EXTEND);
397 computeRegisterProperties();
401 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
403 if (node_names.empty()) {
404 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
405 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
406 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
407 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
408 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
409 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
410 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
411 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
412 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
413 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
414 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
415 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
416 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
417 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
418 = "SPUISD::EXTRACT_ELT0_CHAINED";
419 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
420 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
421 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
422 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
423 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
424 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
425 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
426 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
427 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
428 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
429 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
430 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
431 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
432 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
433 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
434 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
435 "SPUISD::ROTQUAD_RZ_BYTES";
436 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
437 "SPUISD::ROTQUAD_RZ_BITS";
438 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
439 "SPUISD::ROTBYTES_RIGHT_S";
440 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
441 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
442 "SPUISD::ROTBYTES_LEFT_CHAINED";
443 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
444 "SPUISD::ROTBYTES_LEFT_BITS";
445 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
446 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
447 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
448 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
449 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
450 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
451 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
452 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
453 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
456 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
458 return ((i != node_names.end()) ? i->second : 0);
461 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
462 MVT VT = Op.getValueType();
463 return (VT.isInteger() ? VT : MVT(MVT::i32));
466 //===----------------------------------------------------------------------===//
467 // Calling convention code:
468 //===----------------------------------------------------------------------===//
470 #include "SPUGenCallingConv.inc"
472 //===----------------------------------------------------------------------===//
473 // LowerOperation implementation
474 //===----------------------------------------------------------------------===//
476 /// Aligned load common code for CellSPU
478 \param[in] Op The SelectionDAG load or store operand
479 \param[in] DAG The selection DAG
480 \param[in] ST CellSPU subtarget information structure
481 \param[in,out] alignment Caller initializes this to the load or store node's
482 value from getAlignment(), may be updated while generating the aligned load
483 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
484 offset (divisible by 16, modulo 16 == 0)
485 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
486 offset of the preferred slot (modulo 16 != 0)
487 \param[in,out] VT Caller initializes this value type to the the load or store
488 node's loaded or stored value type; may be updated if an i1-extended load or
490 \param[out] was16aligned true if the base pointer had 16-byte alignment,
491 otherwise false. Can help to determine if the chunk needs to be rotated.
493 Both load and store lowering load a block of data aligned on a 16-byte
494 boundary. This is the common aligned load code shared between both.
497 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
499 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
500 MVT &VT, bool &was16aligned)
502 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
503 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
504 SDValue basePtr = LSN->getBasePtr();
505 SDValue chain = LSN->getChain();
507 if (basePtr.getOpcode() == ISD::ADD) {
508 SDValue Op1 = basePtr.getNode()->getOperand(1);
510 if (Op1.getOpcode() == ISD::Constant
511 || Op1.getOpcode() == ISD::TargetConstant) {
512 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
514 alignOffs = (int) CN->getZExtValue();
515 prefSlotOffs = (int) (alignOffs & 0xf);
517 // Adjust the rotation amount to ensure that the final result ends up in
518 // the preferred slot:
519 prefSlotOffs -= vtm->prefslot_byte;
520 basePtr = basePtr.getOperand(0);
522 // Loading from memory, can we adjust alignment?
523 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
524 SDValue APtr = basePtr.getOperand(0);
525 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
526 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
527 alignment = GSDN->getGlobal()->getAlignment();
532 prefSlotOffs = -vtm->prefslot_byte;
534 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
535 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
536 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
537 prefSlotOffs = (int) (alignOffs & 0xf);
538 prefSlotOffs -= vtm->prefslot_byte;
539 basePtr = DAG.getRegister(SPU::R1, VT);
542 prefSlotOffs = -vtm->prefslot_byte;
545 if (alignment == 16) {
546 // Realign the base pointer as a D-Form address:
547 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
548 basePtr = DAG.getNode(ISD::ADD, PtrVT,
550 DAG.getConstant((alignOffs & ~0xf), PtrVT));
553 // Emit the vector load:
555 return DAG.getLoad(MVT::v16i8, chain, basePtr,
556 LSN->getSrcValue(), LSN->getSrcValueOffset(),
557 LSN->isVolatile(), 16);
560 // Unaligned load or we're using the "large memory" model, which means that
561 // we have to be very pessimistic:
562 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
563 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
564 DAG.getConstant(0, PtrVT));
568 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
569 DAG.getConstant((alignOffs & ~0xf), PtrVT));
570 was16aligned = false;
571 return DAG.getLoad(MVT::v16i8, chain, basePtr,
572 LSN->getSrcValue(), LSN->getSrcValueOffset(),
573 LSN->isVolatile(), 16);
576 /// Custom lower loads for CellSPU
578 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
579 within a 16-byte block, we have to rotate to extract the requested element.
582 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
583 LoadSDNode *LN = cast<LoadSDNode>(Op);
584 SDValue the_chain = LN->getChain();
585 MVT VT = LN->getMemoryVT();
586 MVT OpVT = Op.getNode()->getValueType(0);
587 ISD::LoadExtType ExtType = LN->getExtensionType();
588 unsigned alignment = LN->getAlignment();
591 switch (LN->getAddressingMode()) {
592 case ISD::UNINDEXED: {
596 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
598 if (result.getNode() == 0)
601 the_chain = result.getValue(1);
602 // Rotate the chunk if necessary
605 if (rotamt != 0 || !was16aligned) {
606 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
611 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
613 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
614 LoadSDNode *LN1 = cast<LoadSDNode>(result);
615 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
616 DAG.getConstant(rotamt, PtrVT));
619 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
620 the_chain = result.getValue(1);
623 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
625 MVT vecVT = MVT::v16i8;
627 // Convert the loaded v16i8 vector to the appropriate vector type
628 // specified by the operand:
631 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
633 vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
636 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
637 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
638 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
639 the_chain = result.getValue(1);
641 // Handle the sign and zero-extending loads for i1 and i8:
644 if (ExtType == ISD::SEXTLOAD) {
645 NewOpC = (OpVT == MVT::i1
646 ? SPUISD::EXTRACT_I1_SEXT
647 : SPUISD::EXTRACT_I8_SEXT);
649 assert(ExtType == ISD::ZEXTLOAD);
650 NewOpC = (OpVT == MVT::i1
651 ? SPUISD::EXTRACT_I1_ZEXT
652 : SPUISD::EXTRACT_I8_ZEXT);
655 result = DAG.getNode(NewOpC, OpVT, result);
658 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
659 SDValue retops[2] = {
664 result = DAG.getNode(SPUISD::LDRESULT, retvts,
665 retops, sizeof(retops) / sizeof(retops[0]));
672 case ISD::LAST_INDEXED_MODE:
673 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
675 cerr << (unsigned) LN->getAddressingMode() << "\n";
683 /// Custom lower stores for CellSPU
685 All CellSPU stores are aligned to 16-byte boundaries, so for elements
686 within a 16-byte block, we have to generate a shuffle to insert the
687 requested element into its place, then store the resulting block.
690 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
691 StoreSDNode *SN = cast<StoreSDNode>(Op);
692 SDValue Value = SN->getValue();
693 MVT VT = Value.getValueType();
694 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
695 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
696 unsigned alignment = SN->getAlignment();
698 switch (SN->getAddressingMode()) {
699 case ISD::UNINDEXED: {
700 int chunk_offset, slot_offset;
703 // The vector type we really want to load from the 16-byte chunk, except
704 // in the case of MVT::i1, which has to be v16i8.
705 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
706 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
708 SDValue alignLoadVec =
709 AlignedLoad(Op, DAG, ST, SN, alignment,
710 chunk_offset, slot_offset, VT, was16aligned);
712 if (alignLoadVec.getNode() == 0)
715 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
716 SDValue basePtr = LN->getBasePtr();
717 SDValue the_chain = alignLoadVec.getValue(1);
718 SDValue theValue = SN->getValue();
722 && (theValue.getOpcode() == ISD::AssertZext
723 || theValue.getOpcode() == ISD::AssertSext)) {
724 // Drill down and get the value for zero- and sign-extended
726 theValue = theValue.getOperand(0);
731 SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
732 SDValue insertEltPtr;
734 // If the base pointer is already a D-form address, then just create
735 // a new D-form address with a slot offset and the orignal base pointer.
736 // Otherwise generate a D-form address with the slot offset relative
737 // to the stack pointer, which is always aligned.
738 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
739 DEBUG(basePtr.getNode()->dump(&DAG));
742 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
743 (basePtr.getOpcode() == ISD::ADD
744 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
745 insertEltPtr = basePtr;
747 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
750 SDValue insertEltOp =
751 DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
752 SDValue vectorizeOp =
753 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
755 result = DAG.getNode(SPUISD::SHUFB, vecVT, vectorizeOp, alignLoadVec,
756 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
758 result = DAG.getStore(the_chain, result, basePtr,
759 LN->getSrcValue(), LN->getSrcValueOffset(),
760 LN->isVolatile(), LN->getAlignment());
762 #if 0 && defined(NDEBUG)
763 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
764 const SDValue ¤tRoot = DAG.getRoot();
767 cerr << "------- CellSPU:LowerStore result:\n";
770 DAG.setRoot(currentRoot);
781 case ISD::LAST_INDEXED_MODE:
782 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
784 cerr << (unsigned) SN->getAddressingMode() << "\n";
792 /// Generate the address of a constant pool entry.
794 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
795 MVT PtrVT = Op.getValueType();
796 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
797 Constant *C = CP->getConstVal();
798 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
799 SDValue Zero = DAG.getConstant(0, PtrVT);
800 const TargetMachine &TM = DAG.getTarget();
802 if (TM.getRelocationModel() == Reloc::Static) {
803 if (!ST->usingLargeMem()) {
804 // Just return the SDValue with the constant pool address in it.
805 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
807 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
808 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
809 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
814 "LowerConstantPool: Relocation model other than static"
820 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
821 MVT PtrVT = Op.getValueType();
822 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
823 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
824 SDValue Zero = DAG.getConstant(0, PtrVT);
825 const TargetMachine &TM = DAG.getTarget();
827 if (TM.getRelocationModel() == Reloc::Static) {
828 if (!ST->usingLargeMem()) {
829 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
831 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
832 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
833 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
838 "LowerJumpTable: Relocation model other than static not supported.");
843 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
844 MVT PtrVT = Op.getValueType();
845 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
846 GlobalValue *GV = GSDN->getGlobal();
847 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
848 const TargetMachine &TM = DAG.getTarget();
849 SDValue Zero = DAG.getConstant(0, PtrVT);
851 if (TM.getRelocationModel() == Reloc::Static) {
852 if (!ST->usingLargeMem()) {
853 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
855 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
856 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
857 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
860 cerr << "LowerGlobalAddress: Relocation model other than static not "
869 //! Custom lower i64 integer constants
871 This code inserts all of the necessary juggling that needs to occur to load
872 a 64-bit constant into a register.
875 LowerConstant(SDValue Op, SelectionDAG &DAG) {
876 MVT VT = Op.getValueType();
877 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
879 if (VT == MVT::i64) {
880 SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
881 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
882 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
884 cerr << "LowerConstant: unhandled constant type "
894 //! Custom lower double precision floating point constants
896 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
897 MVT VT = Op.getValueType();
898 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
901 "LowerConstantFP: Node is not ConstantFPSDNode");
903 if (VT == MVT::f64) {
904 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
905 return DAG.getNode(ISD::BIT_CONVERT, VT,
906 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
912 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
914 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
916 SDValue Cond = Op.getOperand(1);
917 MVT CondVT = Cond.getValueType();
920 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
921 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
922 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
924 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
927 return SDValue(); // Unchanged
931 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
933 MachineFunction &MF = DAG.getMachineFunction();
934 MachineFrameInfo *MFI = MF.getFrameInfo();
935 MachineRegisterInfo &RegInfo = MF.getRegInfo();
936 SmallVector<SDValue, 48> ArgValues;
937 SDValue Root = Op.getOperand(0);
938 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
940 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
941 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
943 unsigned ArgOffset = SPUFrameInfo::minStackSize();
944 unsigned ArgRegIdx = 0;
945 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
947 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
949 // Add DAG nodes to load the arguments or copy them out of registers.
950 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
951 ArgNo != e; ++ArgNo) {
952 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
953 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
956 if (ArgRegIdx < NumArgRegs) {
957 const TargetRegisterClass *ArgRegClass;
959 switch (ObjectVT.getSimpleVT()) {
961 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
962 << ObjectVT.getMVTString()
967 ArgRegClass = &SPU::R8CRegClass;
970 ArgRegClass = &SPU::R16CRegClass;
973 ArgRegClass = &SPU::R32CRegClass;
976 ArgRegClass = &SPU::R64CRegClass;
979 ArgRegClass = &SPU::R32FPRegClass;
982 ArgRegClass = &SPU::R64FPRegClass;
990 ArgRegClass = &SPU::VECREGRegClass;
994 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
995 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
996 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
999 // We need to load the argument to a virtual register if we determined
1000 // above that we ran out of physical registers of the appropriate type
1001 // or we're forced to do vararg
1002 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1003 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1004 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1005 ArgOffset += StackSlotSize;
1008 ArgValues.push_back(ArgVal);
1010 Root = ArgVal.getOperand(0);
1015 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1016 // We will spill (79-3)+1 registers to the stack
1017 SmallVector<SDValue, 79-3+1> MemOps;
1019 // Create the frame slot
1021 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1022 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1023 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1024 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1025 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1026 Root = Store.getOperand(0);
1027 MemOps.push_back(Store);
1029 // Increment address by stack slot size for the next stored argument
1030 ArgOffset += StackSlotSize;
1032 if (!MemOps.empty())
1033 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1036 ArgValues.push_back(Root);
1038 // Return the new list of results.
1039 return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1043 /// isLSAAddress - Return the immediate to use if the specified
1044 /// value is representable as a LSA address.
1045 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1046 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1049 int Addr = C->getZExtValue();
1050 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1051 (Addr << 14 >> 14) != Addr)
1052 return 0; // Top 14 bits have to be sext of immediate.
1054 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1059 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1060 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1061 SDValue Chain = TheCall->getChain();
1062 SDValue Callee = TheCall->getCallee();
1063 unsigned NumOps = TheCall->getNumArgs();
1064 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1065 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1066 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1068 // Handy pointer type
1069 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1071 // Accumulate how many bytes are to be pushed on the stack, including the
1072 // linkage area, and parameter passing area. According to the SPU ABI,
1073 // we minimally need space for [LR] and [SP]
1074 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1076 // Set up a copy of the stack pointer for use loading and storing any
1077 // arguments that may not fit in the registers available for argument
1079 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1081 // Figure out which arguments are going to go in registers, and which in
1083 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1084 unsigned ArgRegIdx = 0;
1086 // Keep track of registers passing arguments
1087 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1088 // And the arguments passed on the stack
1089 SmallVector<SDValue, 8> MemOpChains;
1091 for (unsigned i = 0; i != NumOps; ++i) {
1092 SDValue Arg = TheCall->getArg(i);
1094 // PtrOff will be used to store the current argument to the stack if a
1095 // register cannot be found for it.
1096 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1097 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1099 switch (Arg.getValueType().getSimpleVT()) {
1100 default: assert(0 && "Unexpected ValueType for argument!");
1104 if (ArgRegIdx != NumArgRegs) {
1105 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1107 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1108 ArgOffset += StackSlotSize;
1113 if (ArgRegIdx != NumArgRegs) {
1114 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1116 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1117 ArgOffset += StackSlotSize;
1124 if (ArgRegIdx != NumArgRegs) {
1125 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1127 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1128 ArgOffset += StackSlotSize;
1134 // Update number of stack bytes actually used, insert a call sequence start
1135 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1136 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1139 if (!MemOpChains.empty()) {
1140 // Adjust the stack pointer for the stack arguments.
1141 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1142 &MemOpChains[0], MemOpChains.size());
1145 // Build a sequence of copy-to-reg nodes chained together with token chain
1146 // and flag operands which copy the outgoing args into the appropriate regs.
1148 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1149 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1151 InFlag = Chain.getValue(1);
1154 SmallVector<SDValue, 8> Ops;
1155 unsigned CallOpc = SPUISD::CALL;
1157 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1158 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1159 // node so that legalize doesn't hack it.
1160 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1161 GlobalValue *GV = G->getGlobal();
1162 MVT CalleeVT = Callee.getValueType();
1163 SDValue Zero = DAG.getConstant(0, PtrVT);
1164 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1166 if (!ST->usingLargeMem()) {
1167 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1168 // style calls, otherwise, external symbols are BRASL calls. This assumes
1169 // that declared/defined symbols are in the same compilation unit and can
1170 // be reached through PC-relative jumps.
1173 // This may be an unsafe assumption for JIT and really large compilation
1175 if (GV->isDeclaration()) {
1176 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1178 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1181 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1183 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1185 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1186 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1187 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1188 // If this is an absolute destination address that appears to be a legal
1189 // local store address, use the munged value.
1190 Callee = SDValue(Dest, 0);
1193 Ops.push_back(Chain);
1194 Ops.push_back(Callee);
1196 // Add argument registers to the end of the list so that they are known live
1198 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1199 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1200 RegsToPass[i].second.getValueType()));
1202 if (InFlag.getNode())
1203 Ops.push_back(InFlag);
1204 // Returns a chain and a flag for retval copy to use.
1205 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1206 &Ops[0], Ops.size());
1207 InFlag = Chain.getValue(1);
1209 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1210 DAG.getIntPtrConstant(0, true), InFlag);
1211 if (TheCall->getValueType(0) != MVT::Other)
1212 InFlag = Chain.getValue(1);
1214 SDValue ResultVals[3];
1215 unsigned NumResults = 0;
1217 // If the call has results, copy the values out of the ret val registers.
1218 switch (TheCall->getValueType(0).getSimpleVT()) {
1219 default: assert(0 && "Unexpected ret value!");
1220 case MVT::Other: break;
1222 if (TheCall->getValueType(1) == MVT::i32) {
1223 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1224 ResultVals[0] = Chain.getValue(0);
1225 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1226 Chain.getValue(2)).getValue(1);
1227 ResultVals[1] = Chain.getValue(0);
1230 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1231 ResultVals[0] = Chain.getValue(0);
1236 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1237 ResultVals[0] = Chain.getValue(0);
1242 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1243 InFlag).getValue(1);
1244 ResultVals[0] = Chain.getValue(0);
1252 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1253 InFlag).getValue(1);
1254 ResultVals[0] = Chain.getValue(0);
1259 // If the function returns void, just return the chain.
1260 if (NumResults == 0)
1263 // Otherwise, merge everything together with a MERGE_VALUES node.
1264 ResultVals[NumResults++] = Chain;
1265 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1266 return Res.getValue(Op.getResNo());
1270 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1271 SmallVector<CCValAssign, 16> RVLocs;
1272 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1273 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1274 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1275 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1277 // If this is the first return lowered for this function, add the regs to the
1278 // liveout set for the function.
1279 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1280 for (unsigned i = 0; i != RVLocs.size(); ++i)
1281 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1284 SDValue Chain = Op.getOperand(0);
1287 // Copy the result values into the output registers.
1288 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1289 CCValAssign &VA = RVLocs[i];
1290 assert(VA.isRegLoc() && "Can only return in registers!");
1291 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1292 Flag = Chain.getValue(1);
1296 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1298 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1302 //===----------------------------------------------------------------------===//
1303 // Vector related lowering:
1304 //===----------------------------------------------------------------------===//
1306 static ConstantSDNode *
1307 getVecImm(SDNode *N) {
1308 SDValue OpVal(0, 0);
1310 // Check to see if this buildvec has a single non-undef value in its elements.
1311 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1312 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1313 if (OpVal.getNode() == 0)
1314 OpVal = N->getOperand(i);
1315 else if (OpVal != N->getOperand(i))
1319 if (OpVal.getNode() != 0) {
1320 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1325 return 0; // All UNDEF: use implicit def.; not Constant node
1328 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1329 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1331 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1333 if (ConstantSDNode *CN = getVecImm(N)) {
1334 uint64_t Value = CN->getZExtValue();
1335 if (ValueType == MVT::i64) {
1336 uint64_t UValue = CN->getZExtValue();
1337 uint32_t upper = uint32_t(UValue >> 32);
1338 uint32_t lower = uint32_t(UValue);
1341 Value = Value >> 32;
1343 if (Value <= 0x3ffff)
1344 return DAG.getTargetConstant(Value, ValueType);
1350 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1351 /// and the value fits into a signed 16-bit constant, and if so, return the
1353 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1355 if (ConstantSDNode *CN = getVecImm(N)) {
1356 int64_t Value = CN->getSExtValue();
1357 if (ValueType == MVT::i64) {
1358 uint64_t UValue = CN->getZExtValue();
1359 uint32_t upper = uint32_t(UValue >> 32);
1360 uint32_t lower = uint32_t(UValue);
1363 Value = Value >> 32;
1365 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1366 return DAG.getTargetConstant(Value, ValueType);
1373 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1374 /// and the value fits into a signed 10-bit constant, and if so, return the
1376 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1378 if (ConstantSDNode *CN = getVecImm(N)) {
1379 int64_t Value = CN->getSExtValue();
1380 if (ValueType == MVT::i64) {
1381 uint64_t UValue = CN->getZExtValue();
1382 uint32_t upper = uint32_t(UValue >> 32);
1383 uint32_t lower = uint32_t(UValue);
1386 Value = Value >> 32;
1388 if (isS10Constant(Value))
1389 return DAG.getTargetConstant(Value, ValueType);
1395 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1396 /// and the value fits into a signed 8-bit constant, and if so, return the
1399 /// @note: The incoming vector is v16i8 because that's the only way we can load
1400 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1402 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1404 if (ConstantSDNode *CN = getVecImm(N)) {
1405 int Value = (int) CN->getZExtValue();
1406 if (ValueType == MVT::i16
1407 && Value <= 0xffff /* truncated from uint64_t */
1408 && ((short) Value >> 8) == ((short) Value & 0xff))
1409 return DAG.getTargetConstant(Value & 0xff, ValueType);
1410 else if (ValueType == MVT::i8
1411 && (Value & 0xff) == Value)
1412 return DAG.getTargetConstant(Value, ValueType);
1418 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1419 /// and the value fits into a signed 16-bit constant, and if so, return the
1421 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1423 if (ConstantSDNode *CN = getVecImm(N)) {
1424 uint64_t Value = CN->getZExtValue();
1425 if ((ValueType == MVT::i32
1426 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1427 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1428 return DAG.getTargetConstant(Value >> 16, ValueType);
1434 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1435 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1436 if (ConstantSDNode *CN = getVecImm(N)) {
1437 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1443 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1444 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1445 if (ConstantSDNode *CN = getVecImm(N)) {
1446 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1452 // If this is a vector of constants or undefs, get the bits. A bit in
1453 // UndefBits is set if the corresponding element of the vector is an
1454 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1455 // zero. Return true if this is not an array of constants, false if it is.
1457 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1458 uint64_t UndefBits[2]) {
1459 // Start with zero'd results.
1460 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1462 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1463 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1464 SDValue OpVal = BV->getOperand(i);
1466 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1467 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1469 uint64_t EltBits = 0;
1470 if (OpVal.getOpcode() == ISD::UNDEF) {
1471 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1472 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1474 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1475 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1476 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1477 const APFloat &apf = CN->getValueAPF();
1478 EltBits = (CN->getValueType(0) == MVT::f32
1479 ? FloatToBits(apf.convertToFloat())
1480 : DoubleToBits(apf.convertToDouble()));
1482 // Nonconstant element.
1486 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1489 //printf("%llx %llx %llx %llx\n",
1490 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1494 /// If this is a splat (repetition) of a value across the whole vector, return
1495 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1496 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1497 /// SplatSize = 1 byte.
1498 static bool isConstantSplat(const uint64_t Bits128[2],
1499 const uint64_t Undef128[2],
1501 uint64_t &SplatBits, uint64_t &SplatUndef,
1503 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1504 // the same as the lower 64-bits, ignoring undefs.
1505 uint64_t Bits64 = Bits128[0] | Bits128[1];
1506 uint64_t Undef64 = Undef128[0] & Undef128[1];
1507 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1508 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1509 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1510 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1512 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1513 if (MinSplatBits < 64) {
1515 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1517 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1518 if (MinSplatBits < 32) {
1520 // If the top 16-bits are different than the lower 16-bits, ignoring
1521 // undefs, we have an i32 splat.
1522 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1523 if (MinSplatBits < 16) {
1524 // If the top 8-bits are different than the lower 8-bits, ignoring
1525 // undefs, we have an i16 splat.
1526 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1527 == ((Bits16 >> 8) & ~Undef16)) {
1528 // Otherwise, we have an 8-bit splat.
1529 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1530 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1536 SplatUndef = Undef16;
1543 SplatUndef = Undef32;
1549 SplatBits = Bits128[0];
1550 SplatUndef = Undef128[0];
1556 return false; // Can't be a splat if two pieces don't match.
1559 // If this is a case we can't handle, return null and let the default
1560 // expansion code take care of it. If we CAN select this case, and if it
1561 // selects to a single instruction, return Op. Otherwise, if we can codegen
1562 // this case more efficiently than a constant pool load, lower it to the
1563 // sequence of ops that should be used.
1564 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1565 MVT VT = Op.getValueType();
1566 // If this is a vector of constants or undefs, get the bits. A bit in
1567 // UndefBits is set if the corresponding element of the vector is an
1568 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1570 uint64_t VectorBits[2];
1571 uint64_t UndefBits[2];
1572 uint64_t SplatBits, SplatUndef;
1574 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1575 || !isConstantSplat(VectorBits, UndefBits,
1576 VT.getVectorElementType().getSizeInBits(),
1577 SplatBits, SplatUndef, SplatSize))
1578 return SDValue(); // Not a constant vector, not a splat.
1580 switch (VT.getSimpleVT()) {
1583 uint32_t Value32 = SplatBits;
1584 assert(SplatSize == 4
1585 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1586 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1587 SDValue T = DAG.getConstant(Value32, MVT::i32);
1588 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1589 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1593 uint64_t f64val = SplatBits;
1594 assert(SplatSize == 8
1595 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1596 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1597 SDValue T = DAG.getConstant(f64val, MVT::i64);
1598 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1599 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1603 // 8-bit constants have to be expanded to 16-bits
1604 unsigned short Value16 = SplatBits | (SplatBits << 8);
1606 for (int i = 0; i < 8; ++i)
1607 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1608 return DAG.getNode(ISD::BIT_CONVERT, VT,
1609 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1612 unsigned short Value16;
1614 Value16 = (unsigned short) (SplatBits & 0xffff);
1616 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1617 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1619 for (int i = 0; i < 8; ++i) Ops[i] = T;
1620 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1623 unsigned int Value = SplatBits;
1624 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1625 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1628 uint64_t val = SplatBits;
1629 uint32_t upper = uint32_t(val >> 32);
1630 uint32_t lower = uint32_t(val);
1632 if (upper == lower) {
1633 // Magic constant that can be matched by IL, ILA, et. al.
1634 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1635 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1639 SmallVector<SDValue, 16> ShufBytes;
1641 bool upper_special, lower_special;
1643 // NOTE: This code creates common-case shuffle masks that can be easily
1644 // detected as common expressions. It is not attempting to create highly
1645 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1647 // Detect if the upper or lower half is a special shuffle mask pattern:
1648 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1649 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1651 // Create lower vector if not a special pattern
1652 if (!lower_special) {
1653 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1654 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1655 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1656 LO32C, LO32C, LO32C, LO32C));
1659 // Create upper vector if not a special pattern
1660 if (!upper_special) {
1661 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1662 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1663 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1664 HI32C, HI32C, HI32C, HI32C));
1667 // If either upper or lower are special, then the two input operands are
1668 // the same (basically, one of them is a "don't care")
1673 if (lower_special && upper_special) {
1674 // Unhappy situation... both upper and lower are special, so punt with
1675 // a target constant:
1676 SDValue Zero = DAG.getConstant(0, MVT::i32);
1677 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1681 for (int i = 0; i < 4; ++i) {
1683 for (int j = 0; j < 4; ++j) {
1685 bool process_upper, process_lower;
1687 process_upper = (upper_special && (i & 1) == 0);
1688 process_lower = (lower_special && (i & 1) == 1);
1690 if (process_upper || process_lower) {
1691 if ((process_upper && upper == 0)
1692 || (process_lower && lower == 0))
1694 else if ((process_upper && upper == 0xffffffff)
1695 || (process_lower && lower == 0xffffffff))
1697 else if ((process_upper && upper == 0x80000000)
1698 || (process_lower && lower == 0x80000000))
1699 val |= (j == 0 ? 0xe0 : 0x80);
1701 val |= i * 4 + j + ((i & 1) * 16);
1704 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1707 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1708 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1709 &ShufBytes[0], ShufBytes.size()));
1717 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1718 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1719 /// permutation vector, V3, is monotonically increasing with one "exception"
1720 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1721 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1722 /// In either case, the net result is going to eventually invoke SHUFB to
1723 /// permute/shuffle the bytes from V1 and V2.
1725 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1726 /// control word for byte/halfword/word insertion. This takes care of a single
1727 /// element move from V2 into V1.
1729 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1730 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1731 SDValue V1 = Op.getOperand(0);
1732 SDValue V2 = Op.getOperand(1);
1733 SDValue PermMask = Op.getOperand(2);
1735 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1737 // If we have a single element being moved from V1 to V2, this can be handled
1738 // using the C*[DX] compute mask instructions, but the vector elements have
1739 // to be monotonically increasing with one exception element.
1740 MVT EltVT = V1.getValueType().getVectorElementType();
1741 unsigned EltsFromV2 = 0;
1743 unsigned V2EltIdx0 = 0;
1744 unsigned CurrElt = 0;
1745 bool monotonic = true;
1746 if (EltVT == MVT::i8)
1748 else if (EltVT == MVT::i16)
1750 else if (EltVT == MVT::i32)
1753 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1755 for (unsigned i = 0, e = PermMask.getNumOperands();
1756 EltsFromV2 <= 1 && monotonic && i != e;
1759 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1762 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1764 if (SrcElt >= V2EltIdx0) {
1766 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1767 } else if (CurrElt != SrcElt) {
1774 if (EltsFromV2 == 1 && monotonic) {
1775 // Compute mask and shuffle
1776 MachineFunction &MF = DAG.getMachineFunction();
1777 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1778 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1779 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1780 // Initialize temporary register to 0
1781 SDValue InitTempReg =
1782 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1783 // Copy register's contents as index in INSERT_MASK:
1784 SDValue ShufMaskOp =
1785 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1786 DAG.getTargetConstant(V2Elt, MVT::i32),
1787 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1788 // Use shuffle mask in SHUFB synthetic instruction:
1789 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1791 // Convert the SHUFFLE_VECTOR mask's input element units to the
1793 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1795 SmallVector<SDValue, 16> ResultMask;
1796 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1798 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1801 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1803 for (unsigned j = 0; j < BytesPerElement; ++j) {
1804 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1809 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1810 &ResultMask[0], ResultMask.size());
1811 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1815 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1816 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1818 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1819 // For a constant, build the appropriate constant vector, which will
1820 // eventually simplify to a vector register load.
1822 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1823 SmallVector<SDValue, 16> ConstVecValues;
1827 // Create a constant vector:
1828 switch (Op.getValueType().getSimpleVT()) {
1829 default: assert(0 && "Unexpected constant value type in "
1830 "LowerSCALAR_TO_VECTOR");
1831 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1832 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1833 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1834 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1835 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1836 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1839 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1840 for (size_t j = 0; j < n_copies; ++j)
1841 ConstVecValues.push_back(CValue);
1843 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1844 &ConstVecValues[0], ConstVecValues.size());
1846 // Otherwise, copy the value from one register to another:
1847 switch (Op0.getValueType().getSimpleVT()) {
1848 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1855 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1862 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1863 switch (Op.getValueType().getSimpleVT()) {
1865 cerr << "CellSPU: Unknown vector multiplication, got "
1866 << Op.getValueType().getMVTString()
1872 SDValue rA = Op.getOperand(0);
1873 SDValue rB = Op.getOperand(1);
1874 SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1875 SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1876 SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1877 SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1879 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1883 // Multiply two v8i16 vectors (pipeline friendly version):
1884 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1885 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1886 // c) Use SELB to select upper and lower halves from the intermediate results
1888 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1889 // dual-issue. This code does manage to do this, even if it's a little on
1892 MachineFunction &MF = DAG.getMachineFunction();
1893 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1894 SDValue Chain = Op.getOperand(0);
1895 SDValue rA = Op.getOperand(0);
1896 SDValue rB = Op.getOperand(1);
1897 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1898 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1901 DAG.getCopyToReg(Chain, FSMBIreg,
1902 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1903 DAG.getConstant(0xcccc, MVT::i16)));
1906 DAG.getCopyToReg(FSMBOp, HiProdReg,
1907 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1909 SDValue HHProd_v4i32 =
1910 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1911 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1913 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1914 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1915 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1916 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1918 DAG.getConstant(16, MVT::i16))),
1919 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1922 // This M00sE is N@stI! (apologies to Monty Python)
1924 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1925 // is to break it all apart, sign extend, and reassemble the various
1926 // intermediate products.
1928 SDValue rA = Op.getOperand(0);
1929 SDValue rB = Op.getOperand(1);
1930 SDValue c8 = DAG.getConstant(8, MVT::i32);
1931 SDValue c16 = DAG.getConstant(16, MVT::i32);
1934 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1935 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1936 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1938 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1940 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1943 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1944 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1946 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1947 DAG.getConstant(0x2222, MVT::i16));
1949 SDValue LoProdParts =
1950 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1951 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1952 LLProd, LHProd, FSMBmask));
1954 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1957 DAG.getNode(ISD::AND, MVT::v4i32,
1959 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1960 LoProdMask, LoProdMask,
1961 LoProdMask, LoProdMask));
1964 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1965 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1968 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1969 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1972 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1973 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1974 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1977 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1978 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1979 DAG.getNode(SPUISD::VEC_SRA,
1980 MVT::v4i32, rAH, c8)),
1981 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1982 DAG.getNode(SPUISD::VEC_SRA,
1983 MVT::v4i32, rBH, c8)));
1986 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1988 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
1992 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
1994 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
1995 DAG.getNode(ISD::OR, MVT::v4i32,
2003 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2004 MachineFunction &MF = DAG.getMachineFunction();
2005 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2007 SDValue A = Op.getOperand(0);
2008 SDValue B = Op.getOperand(1);
2009 MVT VT = Op.getValueType();
2011 unsigned VRegBR, VRegC;
2013 if (VT == MVT::f32) {
2014 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2015 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2017 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2018 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2020 // TODO: make sure we're feeding FPInterp the right arguments
2021 // Right now: fi B, frest(B)
2024 // (Floating Interpolate (FP Reciprocal Estimate B))
2026 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2027 DAG.getNode(SPUISD::FPInterp, VT, B,
2028 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2030 // Computes A * BRcpl and stores in a temporary register
2032 DAG.getCopyToReg(BRcpl, VRegC,
2033 DAG.getNode(ISD::FMUL, VT, A,
2034 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2035 // What's the Chain variable do? It's magic!
2036 // TODO: set Chain = Op(0).getEntryNode()
2038 return DAG.getNode(ISD::FADD, VT,
2039 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2040 DAG.getNode(ISD::FMUL, VT,
2041 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2042 DAG.getNode(ISD::FSUB, VT, A,
2043 DAG.getNode(ISD::FMUL, VT, B,
2044 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2047 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2048 MVT VT = Op.getValueType();
2049 SDValue N = Op.getOperand(0);
2050 SDValue Elt = Op.getOperand(1);
2051 SDValue ShufMask[16];
2052 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2054 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2056 int EltNo = (int) C->getZExtValue();
2059 if (VT == MVT::i8 && EltNo >= 16)
2060 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2061 else if (VT == MVT::i16 && EltNo >= 8)
2062 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2063 else if (VT == MVT::i32 && EltNo >= 4)
2064 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2065 else if (VT == MVT::i64 && EltNo >= 2)
2066 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2068 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2069 // i32 and i64: Element 0 is the preferred slot
2070 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2073 // Need to generate shuffle mask and extract:
2074 int prefslot_begin = -1, prefslot_end = -1;
2075 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2077 switch (VT.getSimpleVT()) {
2079 assert(false && "Invalid value type!");
2081 prefslot_begin = prefslot_end = 3;
2085 prefslot_begin = 2; prefslot_end = 3;
2090 prefslot_begin = 0; prefslot_end = 3;
2095 prefslot_begin = 0; prefslot_end = 7;
2100 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2101 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2103 for (int i = 0; i < 16; ++i) {
2104 // zero fill uppper part of preferred slot, don't care about the
2106 unsigned int mask_val;
2108 if (i <= prefslot_end) {
2110 ((i < prefslot_begin)
2112 : elt_byte + (i - prefslot_begin));
2114 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2116 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2119 SDValue ShufMaskVec =
2120 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2122 sizeof(ShufMask) / sizeof(ShufMask[0]));
2124 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2125 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2126 N, N, ShufMaskVec));
2130 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2131 SDValue VecOp = Op.getOperand(0);
2132 SDValue ValOp = Op.getOperand(1);
2133 SDValue IdxOp = Op.getOperand(2);
2134 MVT VT = Op.getValueType();
2136 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2137 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2139 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2140 // Use $2 because it's always 16-byte aligned and it's available:
2141 SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2144 DAG.getNode(SPUISD::SHUFB, VT,
2145 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2147 DAG.getNode(SPUISD::INSERT_MASK, VT,
2148 DAG.getNode(ISD::ADD, PtrVT,
2150 DAG.getConstant(CN->getZExtValue(),
2156 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2158 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2160 assert(Op.getValueType() == MVT::i8);
2163 assert(0 && "Unhandled i8 math operator");
2167 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2169 SDValue N1 = Op.getOperand(1);
2170 N0 = (N0.getOpcode() != ISD::Constant
2171 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2172 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2174 N1 = (N1.getOpcode() != ISD::Constant
2175 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2176 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2178 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2179 DAG.getNode(Opc, MVT::i16, N0, N1));
2183 SDValue N1 = Op.getOperand(1);
2185 N0 = (N0.getOpcode() != ISD::Constant
2186 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2187 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2189 N1Opc = N1.getValueType().bitsLT(MVT::i32)
2192 N1 = (N1.getOpcode() != ISD::Constant
2193 ? DAG.getNode(N1Opc, MVT::i32, N1)
2194 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2197 DAG.getNode(ISD::OR, MVT::i16, N0,
2198 DAG.getNode(ISD::SHL, MVT::i16,
2199 N0, DAG.getConstant(8, MVT::i32)));
2200 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2201 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2205 SDValue N1 = Op.getOperand(1);
2207 N0 = (N0.getOpcode() != ISD::Constant
2208 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2209 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2211 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2214 N1 = (N1.getOpcode() != ISD::Constant
2215 ? DAG.getNode(N1Opc, MVT::i16, N1)
2216 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2218 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2219 DAG.getNode(Opc, MVT::i16, N0, N1));
2222 SDValue N1 = Op.getOperand(1);
2224 N0 = (N0.getOpcode() != ISD::Constant
2225 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2226 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2228 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2231 N1 = (N1.getOpcode() != ISD::Constant
2232 ? DAG.getNode(N1Opc, MVT::i16, N1)
2233 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2235 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2236 DAG.getNode(Opc, MVT::i16, N0, N1));
2239 SDValue N1 = Op.getOperand(1);
2241 N0 = (N0.getOpcode() != ISD::Constant
2242 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2243 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2245 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2246 N1 = (N1.getOpcode() != ISD::Constant
2247 ? DAG.getNode(N1Opc, MVT::i16, N1)
2248 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2250 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2251 DAG.getNode(Opc, MVT::i16, N0, N1));
2259 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2261 MVT VT = Op.getValueType();
2262 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2264 SDValue Op0 = Op.getOperand(0);
2267 case ISD::ZERO_EXTEND:
2268 case ISD::SIGN_EXTEND:
2269 case ISD::ANY_EXTEND: {
2270 MVT Op0VT = Op0.getValueType();
2271 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2273 assert(Op0VT == MVT::i32
2274 && "CellSPU: Zero/sign extending something other than i32");
2275 DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2277 unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2278 ? SPUISD::ROTBYTES_RIGHT_S
2279 : SPUISD::ROTQUAD_RZ_BYTES);
2280 SDValue PromoteScalar =
2281 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2283 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2284 DAG.getNode(ISD::BIT_CONVERT, VecVT,
2285 DAG.getNode(NewOpc, Op0VecVT,
2287 DAG.getConstant(4, MVT::i32))));
2291 // Turn operands into vectors to satisfy type checking (shufb works on
2294 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2296 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2297 SmallVector<SDValue, 16> ShufBytes;
2299 // Create the shuffle mask for "rotating" the borrow up one register slot
2300 // once the borrow is generated.
2301 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2302 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2303 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2304 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2307 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2308 SDValue ShiftedCarry =
2309 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2311 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2312 &ShufBytes[0], ShufBytes.size()));
2314 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2315 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2316 Op0, Op1, ShiftedCarry));
2320 // Turn operands into vectors to satisfy type checking (shufb works on
2323 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2325 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2326 SmallVector<SDValue, 16> ShufBytes;
2328 // Create the shuffle mask for "rotating" the borrow up one register slot
2329 // once the borrow is generated.
2330 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2331 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2332 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2333 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2336 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2337 SDValue ShiftedBorrow =
2338 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2339 BorrowGen, BorrowGen,
2340 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2341 &ShufBytes[0], ShufBytes.size()));
2343 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2344 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2345 Op0, Op1, ShiftedBorrow));
2349 SDValue ShiftAmt = Op.getOperand(1);
2350 MVT ShiftAmtVT = ShiftAmt.getValueType();
2351 SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2353 DAG.getNode(SPUISD::SELB, VecVT,
2355 DAG.getConstant(0, VecVT),
2356 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2357 DAG.getConstant(0xff00ULL, MVT::i16)));
2358 SDValue ShiftAmtBytes =
2359 DAG.getNode(ISD::SRL, ShiftAmtVT,
2361 DAG.getConstant(3, ShiftAmtVT));
2362 SDValue ShiftAmtBits =
2363 DAG.getNode(ISD::AND, ShiftAmtVT,
2365 DAG.getConstant(7, ShiftAmtVT));
2367 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2368 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2369 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2370 MaskLower, ShiftAmtBytes),
2375 MVT VT = Op.getValueType();
2376 SDValue ShiftAmt = Op.getOperand(1);
2377 MVT ShiftAmtVT = ShiftAmt.getValueType();
2378 SDValue ShiftAmtBytes =
2379 DAG.getNode(ISD::SRL, ShiftAmtVT,
2381 DAG.getConstant(3, ShiftAmtVT));
2382 SDValue ShiftAmtBits =
2383 DAG.getNode(ISD::AND, ShiftAmtVT,
2385 DAG.getConstant(7, ShiftAmtVT));
2387 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2388 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2389 Op0, ShiftAmtBytes),
2394 // Promote Op0 to vector
2396 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2397 SDValue ShiftAmt = Op.getOperand(1);
2398 MVT ShiftVT = ShiftAmt.getValueType();
2400 // Negate variable shift amounts
2401 if (!isa<ConstantSDNode>(ShiftAmt)) {
2402 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2403 DAG.getConstant(0, ShiftVT), ShiftAmt);
2406 SDValue UpperHalfSign =
2407 DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2408 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2409 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2410 Op0, DAG.getConstant(31, MVT::i32))));
2411 SDValue UpperHalfSignMask =
2412 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2413 SDValue UpperLowerMask =
2414 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2415 DAG.getConstant(0xff00, MVT::i16));
2416 SDValue UpperLowerSelect =
2417 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2418 UpperHalfSignMask, Op0, UpperLowerMask);
2419 SDValue RotateLeftBytes =
2420 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2421 UpperLowerSelect, ShiftAmt);
2422 SDValue RotateLeftBits =
2423 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2424 RotateLeftBytes, ShiftAmt);
2426 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2434 //! Lower byte immediate operations for v16i8 vectors:
2436 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2439 MVT VT = Op.getValueType();
2441 ConstVec = Op.getOperand(0);
2442 Arg = Op.getOperand(1);
2443 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2444 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2445 ConstVec = ConstVec.getOperand(0);
2447 ConstVec = Op.getOperand(1);
2448 Arg = Op.getOperand(0);
2449 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2450 ConstVec = ConstVec.getOperand(0);
2455 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2456 uint64_t VectorBits[2];
2457 uint64_t UndefBits[2];
2458 uint64_t SplatBits, SplatUndef;
2461 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2462 && isConstantSplat(VectorBits, UndefBits,
2463 VT.getVectorElementType().getSizeInBits(),
2464 SplatBits, SplatUndef, SplatSize)) {
2466 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2467 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2469 // Turn the BUILD_VECTOR into a set of target constants:
2470 for (size_t i = 0; i < tcVecSize; ++i)
2473 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2474 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2477 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2478 // lowered. Return the operation, rather than a null SDValue.
2482 //! Lower i32 multiplication
2483 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2485 switch (VT.getSimpleVT()) {
2487 cerr << "CellSPU: Unknown LowerMUL value type, got "
2488 << Op.getValueType().getMVTString()
2494 SDValue rA = Op.getOperand(0);
2495 SDValue rB = Op.getOperand(1);
2497 return DAG.getNode(ISD::ADD, MVT::i32,
2498 DAG.getNode(ISD::ADD, MVT::i32,
2499 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2500 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2501 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2508 //! Custom lowering for CTPOP (count population)
2510 Custom lowering code that counts the number ones in the input
2511 operand. SPU has such an instruction, but it counts the number of
2512 ones per byte, which then have to be accumulated.
2514 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2515 MVT VT = Op.getValueType();
2516 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2518 switch (VT.getSimpleVT()) {
2520 assert(false && "Invalid value type!");
2522 SDValue N = Op.getOperand(0);
2523 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2525 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2526 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2528 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2532 MachineFunction &MF = DAG.getMachineFunction();
2533 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2535 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2537 SDValue N = Op.getOperand(0);
2538 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2539 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2540 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2542 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2543 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2545 // CNTB_result becomes the chain to which all of the virtual registers
2546 // CNTB_reg, SUM1_reg become associated:
2547 SDValue CNTB_result =
2548 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2550 SDValue CNTB_rescopy =
2551 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2553 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2555 return DAG.getNode(ISD::AND, MVT::i16,
2556 DAG.getNode(ISD::ADD, MVT::i16,
2557 DAG.getNode(ISD::SRL, MVT::i16,
2564 MachineFunction &MF = DAG.getMachineFunction();
2565 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2567 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2568 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2570 SDValue N = Op.getOperand(0);
2571 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2572 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2573 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2574 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2576 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2577 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2579 // CNTB_result becomes the chain to which all of the virtual registers
2580 // CNTB_reg, SUM1_reg become associated:
2581 SDValue CNTB_result =
2582 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2584 SDValue CNTB_rescopy =
2585 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2588 DAG.getNode(ISD::SRL, MVT::i32,
2589 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2592 DAG.getNode(ISD::ADD, MVT::i32,
2593 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2595 SDValue Sum1_rescopy =
2596 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2599 DAG.getNode(ISD::SRL, MVT::i32,
2600 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2603 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2604 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2606 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2616 /// LowerOperation - Provide custom lowering hooks for some operations.
2619 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2621 unsigned Opc = (unsigned) Op.getOpcode();
2622 MVT VT = Op.getValueType();
2626 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2627 cerr << "Op.getOpcode() = " << Opc << "\n";
2628 cerr << "*Op.getNode():\n";
2629 Op.getNode()->dump();
2635 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2637 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2638 case ISD::ConstantPool:
2639 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2640 case ISD::GlobalAddress:
2641 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2642 case ISD::JumpTable:
2643 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2645 return LowerConstant(Op, DAG);
2646 case ISD::ConstantFP:
2647 return LowerConstantFP(Op, DAG);
2649 return LowerBRCOND(Op, DAG);
2650 case ISD::FORMAL_ARGUMENTS:
2651 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2653 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2655 return LowerRET(Op, DAG, getTargetMachine());
2658 // i8, i64 math ops:
2659 case ISD::ZERO_EXTEND:
2660 case ISD::SIGN_EXTEND:
2661 case ISD::ANY_EXTEND:
2670 return LowerI8Math(Op, DAG, Opc);
2671 else if (VT == MVT::i64)
2672 return LowerI64Math(Op, DAG, Opc);
2676 // Vector-related lowering.
2677 case ISD::BUILD_VECTOR:
2678 return LowerBUILD_VECTOR(Op, DAG);
2679 case ISD::SCALAR_TO_VECTOR:
2680 return LowerSCALAR_TO_VECTOR(Op, DAG);
2681 case ISD::VECTOR_SHUFFLE:
2682 return LowerVECTOR_SHUFFLE(Op, DAG);
2683 case ISD::EXTRACT_VECTOR_ELT:
2684 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2685 case ISD::INSERT_VECTOR_ELT:
2686 return LowerINSERT_VECTOR_ELT(Op, DAG);
2688 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2692 return LowerByteImmed(Op, DAG);
2694 // Vector and i8 multiply:
2697 return LowerVectorMUL(Op, DAG);
2698 else if (VT == MVT::i8)
2699 return LowerI8Math(Op, DAG, Opc);
2701 return LowerMUL(Op, DAG, VT, Opc);
2704 if (VT == MVT::f32 || VT == MVT::v4f32)
2705 return LowerFDIVf32(Op, DAG);
2706 // else if (Op.getValueType() == MVT::f64)
2707 // return LowerFDIVf64(Op, DAG);
2709 assert(0 && "Calling FDIV on unsupported MVT");
2712 return LowerCTPOP(Op, DAG);
2718 SDNode *SPUTargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG)
2721 unsigned Opc = (unsigned) N->getOpcode();
2722 MVT OpVT = N->getValueType(0);
2726 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2727 cerr << "Op.getOpcode() = " << Opc << "\n";
2728 cerr << "*Op.getNode():\n";
2736 /* Otherwise, return unchanged */
2740 //===----------------------------------------------------------------------===//
2741 // Target Optimization Hooks
2742 //===----------------------------------------------------------------------===//
2745 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2748 TargetMachine &TM = getTargetMachine();
2750 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2751 SelectionDAG &DAG = DCI.DAG;
2752 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2753 SDValue Result; // Initially, NULL result
2755 switch (N->getOpcode()) {
2758 SDValue Op1 = N->getOperand(1);
2760 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2761 SDValue Op01 = Op0.getOperand(1);
2762 if (Op01.getOpcode() == ISD::Constant
2763 || Op01.getOpcode() == ISD::TargetConstant) {
2764 // (add <const>, (SPUindirect <arg>, <const>)) ->
2765 // (SPUindirect <arg>, <const + const>)
2766 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2767 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2768 SDValue combinedConst =
2769 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2770 Op0.getValueType());
2772 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2773 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2774 DEBUG(cerr << "With: (SPUindirect <arg>, "
2775 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2776 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2777 Op0.getOperand(0), combinedConst);
2779 } else if (isa<ConstantSDNode>(Op0)
2780 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2781 SDValue Op11 = Op1.getOperand(1);
2782 if (Op11.getOpcode() == ISD::Constant
2783 || Op11.getOpcode() == ISD::TargetConstant) {
2784 // (add (SPUindirect <arg>, <const>), <const>) ->
2785 // (SPUindirect <arg>, <const + const>)
2786 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2787 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2788 SDValue combinedConst =
2789 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2790 Op0.getValueType());
2792 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2793 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2794 DEBUG(cerr << "With: (SPUindirect <arg>, "
2795 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2797 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2798 Op1.getOperand(0), combinedConst);
2803 case ISD::SIGN_EXTEND:
2804 case ISD::ZERO_EXTEND:
2805 case ISD::ANY_EXTEND: {
2806 if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2807 N->getValueType(0) == Op0.getValueType()) {
2808 // (any_extend (SPUextract_elt0 <arg>)) ->
2809 // (SPUextract_elt0 <arg>)
2810 // Types must match, however...
2811 DEBUG(cerr << "Replace: ");
2812 DEBUG(N->dump(&DAG));
2813 DEBUG(cerr << "\nWith: ");
2814 DEBUG(Op0.getNode()->dump(&DAG));
2815 DEBUG(cerr << "\n");
2821 case SPUISD::IndirectAddr: {
2822 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2823 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2824 if (CN->getZExtValue() == 0) {
2825 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2826 // (SPUaform <addr>, 0)
2828 DEBUG(cerr << "Replace: ");
2829 DEBUG(N->dump(&DAG));
2830 DEBUG(cerr << "\nWith: ");
2831 DEBUG(Op0.getNode()->dump(&DAG));
2832 DEBUG(cerr << "\n");
2839 case SPUISD::SHLQUAD_L_BITS:
2840 case SPUISD::SHLQUAD_L_BYTES:
2841 case SPUISD::VEC_SHL:
2842 case SPUISD::VEC_SRL:
2843 case SPUISD::VEC_SRA:
2844 case SPUISD::ROTQUAD_RZ_BYTES:
2845 case SPUISD::ROTQUAD_RZ_BITS: {
2846 SDValue Op1 = N->getOperand(1);
2848 if (isa<ConstantSDNode>(Op1)) {
2849 // Kill degenerate vector shifts:
2850 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2852 if (CN->getZExtValue() == 0) {
2858 case SPUISD::PROMOTE_SCALAR: {
2859 switch (Op0.getOpcode()) {
2862 case ISD::ANY_EXTEND:
2863 case ISD::ZERO_EXTEND:
2864 case ISD::SIGN_EXTEND: {
2865 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2867 // but only if the SPUpromote_scalar and <arg> types match.
2868 SDValue Op00 = Op0.getOperand(0);
2869 if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2870 SDValue Op000 = Op00.getOperand(0);
2871 if (Op000.getValueType() == N->getValueType(0)) {
2877 case SPUISD::EXTRACT_ELT0: {
2878 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2880 Result = Op0.getOperand(0);
2887 // Otherwise, return unchanged.
2889 if (Result.getNode()) {
2890 DEBUG(cerr << "\nReplace.SPU: ");
2891 DEBUG(N->dump(&DAG));
2892 DEBUG(cerr << "\nWith: ");
2893 DEBUG(Result.getNode()->dump(&DAG));
2894 DEBUG(cerr << "\n");
2901 //===----------------------------------------------------------------------===//
2902 // Inline Assembly Support
2903 //===----------------------------------------------------------------------===//
2905 /// getConstraintType - Given a constraint letter, return the type of
2906 /// constraint it is for this target.
2907 SPUTargetLowering::ConstraintType
2908 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2909 if (ConstraintLetter.size() == 1) {
2910 switch (ConstraintLetter[0]) {
2917 return C_RegisterClass;
2920 return TargetLowering::getConstraintType(ConstraintLetter);
2923 std::pair<unsigned, const TargetRegisterClass*>
2924 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2927 if (Constraint.size() == 1) {
2928 // GCC RS6000 Constraint Letters
2929 switch (Constraint[0]) {
2933 return std::make_pair(0U, SPU::R64CRegisterClass);
2934 return std::make_pair(0U, SPU::R32CRegisterClass);
2937 return std::make_pair(0U, SPU::R32FPRegisterClass);
2938 else if (VT == MVT::f64)
2939 return std::make_pair(0U, SPU::R64FPRegisterClass);
2942 return std::make_pair(0U, SPU::GPRCRegisterClass);
2946 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2949 //! Compute used/known bits for a SPU operand
2951 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2955 const SelectionDAG &DAG,
2956 unsigned Depth ) const {
2958 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2961 switch (Op.getOpcode()) {
2963 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2973 case SPUISD::PROMOTE_SCALAR: {
2974 SDValue Op0 = Op.getOperand(0);
2975 MVT Op0VT = Op0.getValueType();
2976 unsigned Op0VTBits = Op0VT.getSizeInBits();
2977 uint64_t InMask = Op0VT.getIntegerVTBitMask();
2978 KnownZero |= APInt(Op0VTBits, ~InMask, false);
2979 KnownOne |= APInt(Op0VTBits, InMask, false);
2983 case SPUISD::LDRESULT:
2984 case SPUISD::EXTRACT_ELT0:
2985 case SPUISD::EXTRACT_ELT0_CHAINED: {
2986 MVT OpVT = Op.getValueType();
2987 unsigned OpVTBits = OpVT.getSizeInBits();
2988 uint64_t InMask = OpVT.getIntegerVTBitMask();
2989 KnownZero |= APInt(OpVTBits, ~InMask, false);
2990 KnownOne |= APInt(OpVTBits, InMask, false);
2995 case EXTRACT_I1_ZEXT:
2996 case EXTRACT_I1_SEXT:
2997 case EXTRACT_I8_ZEXT:
2998 case EXTRACT_I8_SEXT:
3003 case SPUISD::SHLQUAD_L_BITS:
3004 case SPUISD::SHLQUAD_L_BYTES:
3005 case SPUISD::VEC_SHL:
3006 case SPUISD::VEC_SRL:
3007 case SPUISD::VEC_SRA:
3008 case SPUISD::VEC_ROTL:
3009 case SPUISD::VEC_ROTR:
3010 case SPUISD::ROTQUAD_RZ_BYTES:
3011 case SPUISD::ROTQUAD_RZ_BITS:
3012 case SPUISD::ROTBYTES_RIGHT_S:
3013 case SPUISD::ROTBYTES_LEFT:
3014 case SPUISD::ROTBYTES_LEFT_CHAINED:
3015 case SPUISD::SELECT_MASK:
3017 case SPUISD::FPInterp:
3018 case SPUISD::FPRecipEst:
3019 case SPUISD::SEXT32TO64:
3024 // LowerAsmOperandForConstraint
3026 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3027 char ConstraintLetter,
3029 std::vector<SDValue> &Ops,
3030 SelectionDAG &DAG) const {
3031 // Default, for the time being, to the base class handler
3032 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3036 /// isLegalAddressImmediate - Return true if the integer value can be used
3037 /// as the offset of the target addressing mode.
3038 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3039 const Type *Ty) const {
3040 // SPU's addresses are 256K:
3041 return (V > -(1 << 18) && V < (1 << 18) - 1);
3044 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3049 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3050 // The SPU target isn't yet aware of offsets.