1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT mapping to useful data for Cell SPU
41 struct valtype_map_s {
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDValue &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDValue &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // SPU has no sign or zero extended loads for i1, i8, i16:
134 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
135 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
136 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
137 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
138 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
139 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
140 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
141 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
143 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
144 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
145 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
146 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
147 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
148 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
149 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
150 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
152 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
153 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
154 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
156 // SPU constant load actions are custom lowered:
157 setOperationAction(ISD::Constant, MVT::i64, Custom);
158 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
159 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
161 // SPU's loads and stores have to be custom lowered:
162 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
164 MVT VT = (MVT::SimpleValueType)sctype;
166 setOperationAction(ISD::LOAD, VT, Custom);
167 setOperationAction(ISD::STORE, VT, Custom);
170 // Custom lower BRCOND for i1, i8 to "promote" the result to
171 // i32 and i16, respectively.
172 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
174 // Expand the jumptable branches
175 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
176 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
177 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
179 // SPU has no intrinsics for these particular operations:
180 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
182 // PowerPC has no SREM/UREM instructions
183 setOperationAction(ISD::SREM, MVT::i32, Expand);
184 setOperationAction(ISD::UREM, MVT::i32, Expand);
185 setOperationAction(ISD::SREM, MVT::i64, Expand);
186 setOperationAction(ISD::UREM, MVT::i64, Expand);
188 // We don't support sin/cos/sqrt/fmod
189 setOperationAction(ISD::FSIN , MVT::f64, Expand);
190 setOperationAction(ISD::FCOS , MVT::f64, Expand);
191 setOperationAction(ISD::FREM , MVT::f64, Expand);
192 setOperationAction(ISD::FSIN , MVT::f32, Expand);
193 setOperationAction(ISD::FCOS , MVT::f32, Expand);
194 setOperationAction(ISD::FREM , MVT::f32, Expand);
196 // If we're enabling GP optimizations, use hardware square root
197 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
198 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
200 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
201 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
203 // SPU can do rotate right and left, so legalize it... but customize for i8
204 // because instructions don't exist.
206 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
208 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
209 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
210 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
212 setOperationAction(ISD::ROTL, MVT::i32, Legal);
213 setOperationAction(ISD::ROTL, MVT::i16, Legal);
214 setOperationAction(ISD::ROTL, MVT::i8, Custom);
215 // SPU has no native version of shift left/right for i8
216 setOperationAction(ISD::SHL, MVT::i8, Custom);
217 setOperationAction(ISD::SRL, MVT::i8, Custom);
218 setOperationAction(ISD::SRA, MVT::i8, Custom);
219 // And SPU needs custom lowering for shift left/right for i64
220 setOperationAction(ISD::SHL, MVT::i64, Custom);
221 setOperationAction(ISD::SRL, MVT::i64, Custom);
222 setOperationAction(ISD::SRA, MVT::i64, Custom);
224 // Custom lower i8, i32 and i64 multiplications
225 setOperationAction(ISD::MUL, MVT::i8, Custom);
226 setOperationAction(ISD::MUL, MVT::i32, Custom);
227 setOperationAction(ISD::MUL, MVT::i64, Custom);
229 // Need to custom handle (some) common i8, i64 math ops
230 setOperationAction(ISD::ADD, MVT::i64, Custom);
231 setOperationAction(ISD::SUB, MVT::i8, Custom);
232 setOperationAction(ISD::SUB, MVT::i64, Custom);
234 // SPU does not have BSWAP. It does have i32 support CTLZ.
235 // CTPOP has to be custom lowered.
236 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
237 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
239 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
240 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
241 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
242 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
244 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
245 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
247 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
249 // SPU has a version of select that implements (a&~c)|(b&c), just like
250 // select ought to work:
251 setOperationAction(ISD::SELECT, MVT::i1, Promote);
252 setOperationAction(ISD::SELECT, MVT::i8, Legal);
253 setOperationAction(ISD::SELECT, MVT::i16, Legal);
254 setOperationAction(ISD::SELECT, MVT::i32, Legal);
255 setOperationAction(ISD::SELECT, MVT::i64, Expand);
257 setOperationAction(ISD::SETCC, MVT::i1, Promote);
258 setOperationAction(ISD::SETCC, MVT::i8, Legal);
259 setOperationAction(ISD::SETCC, MVT::i16, Legal);
260 setOperationAction(ISD::SETCC, MVT::i32, Legal);
261 setOperationAction(ISD::SETCC, MVT::i64, Expand);
263 // Zero extension and sign extension for i64 have to be
265 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
266 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
267 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
269 // SPU has a legal FP -> signed INT instruction
270 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
271 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
272 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
273 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
275 // FDIV on SPU requires custom lowering
276 setOperationAction(ISD::FDIV, MVT::f32, Custom);
277 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
279 // SPU has [U|S]INT_TO_FP
280 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
281 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
282 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
283 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
284 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
285 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
286 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
287 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
289 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
290 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
291 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
292 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
294 // We cannot sextinreg(i1). Expand to shifts.
295 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
297 // Support label based line numbers.
298 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
299 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
301 // We want to legalize GlobalAddress and ConstantPool nodes into the
302 // appropriate instructions to materialize the address.
303 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
305 MVT VT = (MVT::SimpleValueType)sctype;
307 setOperationAction(ISD::GlobalAddress, VT, Custom);
308 setOperationAction(ISD::ConstantPool, VT, Custom);
309 setOperationAction(ISD::JumpTable, VT, Custom);
312 // RET must be custom lowered, to meet ABI requirements
313 setOperationAction(ISD::RET, MVT::Other, Custom);
315 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
316 setOperationAction(ISD::VASTART , MVT::Other, Custom);
318 // Use the default implementation.
319 setOperationAction(ISD::VAARG , MVT::Other, Expand);
320 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
321 setOperationAction(ISD::VAEND , MVT::Other, Expand);
322 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
323 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
324 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
325 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
327 // Cell SPU has instructions for converting between i64 and fp.
328 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
329 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
331 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
332 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
334 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
335 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
337 // First set operation action for all vector types to expand. Then we
338 // will selectively turn on ones that can be effectively codegen'd.
339 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
340 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
341 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
342 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
343 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
344 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
346 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
347 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
348 MVT VT = (MVT::SimpleValueType)i;
350 // add/sub are legal for all supported vector VT's.
351 setOperationAction(ISD::ADD , VT, Legal);
352 setOperationAction(ISD::SUB , VT, Legal);
353 // mul has to be custom lowered.
354 setOperationAction(ISD::MUL , VT, Custom);
356 setOperationAction(ISD::AND , VT, Legal);
357 setOperationAction(ISD::OR , VT, Legal);
358 setOperationAction(ISD::XOR , VT, Legal);
359 setOperationAction(ISD::LOAD , VT, Legal);
360 setOperationAction(ISD::SELECT, VT, Legal);
361 setOperationAction(ISD::STORE, VT, Legal);
363 // These operations need to be expanded:
364 setOperationAction(ISD::SDIV, VT, Expand);
365 setOperationAction(ISD::SREM, VT, Expand);
366 setOperationAction(ISD::UDIV, VT, Expand);
367 setOperationAction(ISD::UREM, VT, Expand);
368 setOperationAction(ISD::FDIV, VT, Custom);
370 // Custom lower build_vector, constant pool spills, insert and
371 // extract vector elements:
372 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
373 setOperationAction(ISD::ConstantPool, VT, Custom);
374 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
375 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
376 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
377 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
380 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
381 setOperationAction(ISD::AND, MVT::v16i8, Custom);
382 setOperationAction(ISD::OR, MVT::v16i8, Custom);
383 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
384 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
386 setShiftAmountType(MVT::i32);
387 setSetCCResultContents(ZeroOrOneSetCCResult);
389 setStackPointerRegisterToSaveRestore(SPU::R1);
391 // We have target-specific dag combine patterns for the following nodes:
392 setTargetDAGCombine(ISD::ADD);
393 setTargetDAGCombine(ISD::ZERO_EXTEND);
394 setTargetDAGCombine(ISD::SIGN_EXTEND);
395 setTargetDAGCombine(ISD::ANY_EXTEND);
397 computeRegisterProperties();
401 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
403 if (node_names.empty()) {
404 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
405 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
406 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
407 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
408 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
409 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
410 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
411 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
412 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
413 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
414 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
415 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
416 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
417 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
418 = "SPUISD::EXTRACT_ELT0_CHAINED";
419 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
420 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
421 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
422 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
423 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
424 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
425 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
426 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
427 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
428 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
429 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
430 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
431 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
432 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
433 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
434 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
435 "SPUISD::ROTQUAD_RZ_BYTES";
436 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
437 "SPUISD::ROTQUAD_RZ_BITS";
438 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
439 "SPUISD::ROTBYTES_RIGHT_S";
440 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
441 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
442 "SPUISD::ROTBYTES_LEFT_CHAINED";
443 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
444 "SPUISD::ROTBYTES_LEFT_BITS";
445 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
446 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
447 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
448 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
449 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
450 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
451 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
452 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
453 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
456 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
458 return ((i != node_names.end()) ? i->second : 0);
461 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
462 MVT VT = Op.getValueType();
463 return (VT.isInteger() ? VT : MVT(MVT::i32));
466 //===----------------------------------------------------------------------===//
467 // Calling convention code:
468 //===----------------------------------------------------------------------===//
470 #include "SPUGenCallingConv.inc"
472 //===----------------------------------------------------------------------===//
473 // LowerOperation implementation
474 //===----------------------------------------------------------------------===//
476 /// Aligned load common code for CellSPU
478 \param[in] Op The SelectionDAG load or store operand
479 \param[in] DAG The selection DAG
480 \param[in] ST CellSPU subtarget information structure
481 \param[in,out] alignment Caller initializes this to the load or store node's
482 value from getAlignment(), may be updated while generating the aligned load
483 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
484 offset (divisible by 16, modulo 16 == 0)
485 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
486 offset of the preferred slot (modulo 16 != 0)
487 \param[in,out] VT Caller initializes this value type to the the load or store
488 node's loaded or stored value type; may be updated if an i1-extended load or
490 \param[out] was16aligned true if the base pointer had 16-byte alignment,
491 otherwise false. Can help to determine if the chunk needs to be rotated.
493 Both load and store lowering load a block of data aligned on a 16-byte
494 boundary. This is the common aligned load code shared between both.
497 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
499 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
500 MVT &VT, bool &was16aligned)
502 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
503 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
504 SDValue basePtr = LSN->getBasePtr();
505 SDValue chain = LSN->getChain();
507 if (basePtr.getOpcode() == ISD::ADD) {
508 SDValue Op1 = basePtr.getNode()->getOperand(1);
510 if (Op1.getOpcode() == ISD::Constant
511 || Op1.getOpcode() == ISD::TargetConstant) {
512 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
514 alignOffs = (int) CN->getZExtValue();
515 prefSlotOffs = (int) (alignOffs & 0xf);
517 // Adjust the rotation amount to ensure that the final result ends up in
518 // the preferred slot:
519 prefSlotOffs -= vtm->prefslot_byte;
520 basePtr = basePtr.getOperand(0);
522 // Loading from memory, can we adjust alignment?
523 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
524 SDValue APtr = basePtr.getOperand(0);
525 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
526 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
527 alignment = GSDN->getGlobal()->getAlignment();
532 prefSlotOffs = -vtm->prefslot_byte;
534 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
535 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
536 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
537 prefSlotOffs = (int) (alignOffs & 0xf);
538 prefSlotOffs -= vtm->prefslot_byte;
539 basePtr = DAG.getRegister(SPU::R1, VT);
542 prefSlotOffs = -vtm->prefslot_byte;
545 if (alignment == 16) {
546 // Realign the base pointer as a D-Form address:
547 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
548 basePtr = DAG.getNode(ISD::ADD, PtrVT,
550 DAG.getConstant((alignOffs & ~0xf), PtrVT));
553 // Emit the vector load:
555 return DAG.getLoad(MVT::v16i8, chain, basePtr,
556 LSN->getSrcValue(), LSN->getSrcValueOffset(),
557 LSN->isVolatile(), 16);
560 // Unaligned load or we're using the "large memory" model, which means that
561 // we have to be very pessimistic:
562 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
563 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
564 DAG.getConstant(0, PtrVT));
568 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
569 DAG.getConstant((alignOffs & ~0xf), PtrVT));
570 was16aligned = false;
571 return DAG.getLoad(MVT::v16i8, chain, basePtr,
572 LSN->getSrcValue(), LSN->getSrcValueOffset(),
573 LSN->isVolatile(), 16);
576 /// Custom lower loads for CellSPU
578 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
579 within a 16-byte block, we have to rotate to extract the requested element.
582 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
583 LoadSDNode *LN = cast<LoadSDNode>(Op);
584 SDValue the_chain = LN->getChain();
585 MVT VT = LN->getMemoryVT();
586 MVT OpVT = Op.getNode()->getValueType(0);
587 ISD::LoadExtType ExtType = LN->getExtensionType();
588 unsigned alignment = LN->getAlignment();
591 switch (LN->getAddressingMode()) {
592 case ISD::UNINDEXED: {
596 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
598 if (result.getNode() == 0)
601 the_chain = result.getValue(1);
602 // Rotate the chunk if necessary
605 if (rotamt != 0 || !was16aligned) {
606 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
611 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
613 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
614 LoadSDNode *LN1 = cast<LoadSDNode>(result);
615 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
616 DAG.getConstant(rotamt, PtrVT));
619 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
620 the_chain = result.getValue(1);
623 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
625 MVT vecVT = MVT::v16i8;
627 // Convert the loaded v16i8 vector to the appropriate vector type
628 // specified by the operand:
631 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
633 vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
636 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
637 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
638 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
639 the_chain = result.getValue(1);
641 // Handle the sign and zero-extending loads for i1 and i8:
644 if (ExtType == ISD::SEXTLOAD) {
645 NewOpC = (OpVT == MVT::i1
646 ? SPUISD::EXTRACT_I1_SEXT
647 : SPUISD::EXTRACT_I8_SEXT);
649 assert(ExtType == ISD::ZEXTLOAD);
650 NewOpC = (OpVT == MVT::i1
651 ? SPUISD::EXTRACT_I1_ZEXT
652 : SPUISD::EXTRACT_I8_ZEXT);
655 result = DAG.getNode(NewOpC, OpVT, result);
658 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
659 SDValue retops[2] = {
664 result = DAG.getNode(SPUISD::LDRESULT, retvts,
665 retops, sizeof(retops) / sizeof(retops[0]));
672 case ISD::LAST_INDEXED_MODE:
673 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
675 cerr << (unsigned) LN->getAddressingMode() << "\n";
683 /// Custom lower stores for CellSPU
685 All CellSPU stores are aligned to 16-byte boundaries, so for elements
686 within a 16-byte block, we have to generate a shuffle to insert the
687 requested element into its place, then store the resulting block.
690 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
691 StoreSDNode *SN = cast<StoreSDNode>(Op);
692 SDValue Value = SN->getValue();
693 MVT VT = Value.getValueType();
694 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
695 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
696 unsigned alignment = SN->getAlignment();
698 switch (SN->getAddressingMode()) {
699 case ISD::UNINDEXED: {
700 int chunk_offset, slot_offset;
703 // The vector type we really want to load from the 16-byte chunk, except
704 // in the case of MVT::i1, which has to be v16i8.
705 MVT vecVT, stVecVT = MVT::v16i8;
708 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
709 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
711 SDValue alignLoadVec =
712 AlignedLoad(Op, DAG, ST, SN, alignment,
713 chunk_offset, slot_offset, VT, was16aligned);
715 if (alignLoadVec.getNode() == 0)
718 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
719 SDValue basePtr = LN->getBasePtr();
720 SDValue the_chain = alignLoadVec.getValue(1);
721 SDValue theValue = SN->getValue();
725 && (theValue.getOpcode() == ISD::AssertZext
726 || theValue.getOpcode() == ISD::AssertSext)) {
727 // Drill down and get the value for zero- and sign-extended
729 theValue = theValue.getOperand(0);
734 SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
735 SDValue insertEltPtr;
738 // If the base pointer is already a D-form address, then just create
739 // a new D-form address with a slot offset and the orignal base pointer.
740 // Otherwise generate a D-form address with the slot offset relative
741 // to the stack pointer, which is always aligned.
742 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
743 DEBUG(basePtr.getNode()->dump(&DAG));
746 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
747 (basePtr.getOpcode() == ISD::ADD
748 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
749 insertEltPtr = basePtr;
751 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
754 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
755 result = DAG.getNode(SPUISD::SHUFB, vecVT,
756 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
758 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
760 result = DAG.getStore(the_chain, result, basePtr,
761 LN->getSrcValue(), LN->getSrcValueOffset(),
762 LN->isVolatile(), LN->getAlignment());
771 case ISD::LAST_INDEXED_MODE:
772 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
774 cerr << (unsigned) SN->getAddressingMode() << "\n";
782 /// Generate the address of a constant pool entry.
784 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
785 MVT PtrVT = Op.getValueType();
786 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
787 Constant *C = CP->getConstVal();
788 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
789 SDValue Zero = DAG.getConstant(0, PtrVT);
790 const TargetMachine &TM = DAG.getTarget();
792 if (TM.getRelocationModel() == Reloc::Static) {
793 if (!ST->usingLargeMem()) {
794 // Just return the SDValue with the constant pool address in it.
795 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
797 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
798 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
799 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
804 "LowerConstantPool: Relocation model other than static"
810 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
811 MVT PtrVT = Op.getValueType();
812 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
813 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
814 SDValue Zero = DAG.getConstant(0, PtrVT);
815 const TargetMachine &TM = DAG.getTarget();
817 if (TM.getRelocationModel() == Reloc::Static) {
818 if (!ST->usingLargeMem()) {
819 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
821 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
822 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
823 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
828 "LowerJumpTable: Relocation model other than static not supported.");
833 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
834 MVT PtrVT = Op.getValueType();
835 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
836 GlobalValue *GV = GSDN->getGlobal();
837 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
838 const TargetMachine &TM = DAG.getTarget();
839 SDValue Zero = DAG.getConstant(0, PtrVT);
841 if (TM.getRelocationModel() == Reloc::Static) {
842 if (!ST->usingLargeMem()) {
843 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
845 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
846 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
847 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
850 cerr << "LowerGlobalAddress: Relocation model other than static not "
859 //! Custom lower i64 integer constants
861 This code inserts all of the necessary juggling that needs to occur to load
862 a 64-bit constant into a register.
865 LowerConstant(SDValue Op, SelectionDAG &DAG) {
866 MVT VT = Op.getValueType();
867 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
869 if (VT == MVT::i64) {
870 SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
871 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
872 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
874 cerr << "LowerConstant: unhandled constant type "
884 //! Custom lower double precision floating point constants
886 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
887 MVT VT = Op.getValueType();
888 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
891 "LowerConstantFP: Node is not ConstantFPSDNode");
893 if (VT == MVT::f64) {
894 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
895 return DAG.getNode(ISD::BIT_CONVERT, VT,
896 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
902 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
904 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
906 SDValue Cond = Op.getOperand(1);
907 MVT CondVT = Cond.getValueType();
910 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
911 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
912 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
914 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
917 return SDValue(); // Unchanged
921 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
923 MachineFunction &MF = DAG.getMachineFunction();
924 MachineFrameInfo *MFI = MF.getFrameInfo();
925 MachineRegisterInfo &RegInfo = MF.getRegInfo();
926 SmallVector<SDValue, 48> ArgValues;
927 SDValue Root = Op.getOperand(0);
928 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
930 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
931 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
933 unsigned ArgOffset = SPUFrameInfo::minStackSize();
934 unsigned ArgRegIdx = 0;
935 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
937 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
939 // Add DAG nodes to load the arguments or copy them out of registers.
940 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
941 ArgNo != e; ++ArgNo) {
942 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
943 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
946 if (ArgRegIdx < NumArgRegs) {
947 const TargetRegisterClass *ArgRegClass;
949 switch (ObjectVT.getSimpleVT()) {
951 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
952 << ObjectVT.getMVTString()
957 ArgRegClass = &SPU::R8CRegClass;
960 ArgRegClass = &SPU::R16CRegClass;
963 ArgRegClass = &SPU::R32CRegClass;
966 ArgRegClass = &SPU::R64CRegClass;
969 ArgRegClass = &SPU::R32FPRegClass;
972 ArgRegClass = &SPU::R64FPRegClass;
980 ArgRegClass = &SPU::VECREGRegClass;
984 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
985 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
986 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
989 // We need to load the argument to a virtual register if we determined
990 // above that we ran out of physical registers of the appropriate type
991 // or we're forced to do vararg
992 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
993 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
994 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
995 ArgOffset += StackSlotSize;
998 ArgValues.push_back(ArgVal);
1000 Root = ArgVal.getOperand(0);
1005 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1006 // We will spill (79-3)+1 registers to the stack
1007 SmallVector<SDValue, 79-3+1> MemOps;
1009 // Create the frame slot
1011 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1012 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1013 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1014 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1015 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1016 Root = Store.getOperand(0);
1017 MemOps.push_back(Store);
1019 // Increment address by stack slot size for the next stored argument
1020 ArgOffset += StackSlotSize;
1022 if (!MemOps.empty())
1023 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1026 ArgValues.push_back(Root);
1028 // Return the new list of results.
1029 return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1033 /// isLSAAddress - Return the immediate to use if the specified
1034 /// value is representable as a LSA address.
1035 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1036 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1039 int Addr = C->getZExtValue();
1040 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1041 (Addr << 14 >> 14) != Addr)
1042 return 0; // Top 14 bits have to be sext of immediate.
1044 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1049 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1050 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1051 SDValue Chain = TheCall->getChain();
1052 SDValue Callee = TheCall->getCallee();
1053 unsigned NumOps = TheCall->getNumArgs();
1054 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1055 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1056 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1058 // Handy pointer type
1059 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1061 // Accumulate how many bytes are to be pushed on the stack, including the
1062 // linkage area, and parameter passing area. According to the SPU ABI,
1063 // we minimally need space for [LR] and [SP]
1064 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1066 // Set up a copy of the stack pointer for use loading and storing any
1067 // arguments that may not fit in the registers available for argument
1069 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1071 // Figure out which arguments are going to go in registers, and which in
1073 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1074 unsigned ArgRegIdx = 0;
1076 // Keep track of registers passing arguments
1077 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1078 // And the arguments passed on the stack
1079 SmallVector<SDValue, 8> MemOpChains;
1081 for (unsigned i = 0; i != NumOps; ++i) {
1082 SDValue Arg = TheCall->getArg(i);
1084 // PtrOff will be used to store the current argument to the stack if a
1085 // register cannot be found for it.
1086 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1087 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1089 switch (Arg.getValueType().getSimpleVT()) {
1090 default: assert(0 && "Unexpected ValueType for argument!");
1094 if (ArgRegIdx != NumArgRegs) {
1095 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1097 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1098 ArgOffset += StackSlotSize;
1103 if (ArgRegIdx != NumArgRegs) {
1104 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1106 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1107 ArgOffset += StackSlotSize;
1114 if (ArgRegIdx != NumArgRegs) {
1115 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1117 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1118 ArgOffset += StackSlotSize;
1124 // Update number of stack bytes actually used, insert a call sequence start
1125 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1126 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1129 if (!MemOpChains.empty()) {
1130 // Adjust the stack pointer for the stack arguments.
1131 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1132 &MemOpChains[0], MemOpChains.size());
1135 // Build a sequence of copy-to-reg nodes chained together with token chain
1136 // and flag operands which copy the outgoing args into the appropriate regs.
1138 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1139 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1141 InFlag = Chain.getValue(1);
1144 SmallVector<SDValue, 8> Ops;
1145 unsigned CallOpc = SPUISD::CALL;
1147 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1148 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1149 // node so that legalize doesn't hack it.
1150 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1151 GlobalValue *GV = G->getGlobal();
1152 MVT CalleeVT = Callee.getValueType();
1153 SDValue Zero = DAG.getConstant(0, PtrVT);
1154 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1156 if (!ST->usingLargeMem()) {
1157 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1158 // style calls, otherwise, external symbols are BRASL calls. This assumes
1159 // that declared/defined symbols are in the same compilation unit and can
1160 // be reached through PC-relative jumps.
1163 // This may be an unsafe assumption for JIT and really large compilation
1165 if (GV->isDeclaration()) {
1166 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1168 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1171 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1173 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1175 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1176 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1177 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1178 // If this is an absolute destination address that appears to be a legal
1179 // local store address, use the munged value.
1180 Callee = SDValue(Dest, 0);
1183 Ops.push_back(Chain);
1184 Ops.push_back(Callee);
1186 // Add argument registers to the end of the list so that they are known live
1188 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1189 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1190 RegsToPass[i].second.getValueType()));
1192 if (InFlag.getNode())
1193 Ops.push_back(InFlag);
1194 // Returns a chain and a flag for retval copy to use.
1195 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1196 &Ops[0], Ops.size());
1197 InFlag = Chain.getValue(1);
1199 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1200 DAG.getIntPtrConstant(0, true), InFlag);
1201 if (TheCall->getValueType(0) != MVT::Other)
1202 InFlag = Chain.getValue(1);
1204 SDValue ResultVals[3];
1205 unsigned NumResults = 0;
1207 // If the call has results, copy the values out of the ret val registers.
1208 switch (TheCall->getValueType(0).getSimpleVT()) {
1209 default: assert(0 && "Unexpected ret value!");
1210 case MVT::Other: break;
1212 if (TheCall->getValueType(1) == MVT::i32) {
1213 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1214 ResultVals[0] = Chain.getValue(0);
1215 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1216 Chain.getValue(2)).getValue(1);
1217 ResultVals[1] = Chain.getValue(0);
1220 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1221 ResultVals[0] = Chain.getValue(0);
1226 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1227 ResultVals[0] = Chain.getValue(0);
1232 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1233 InFlag).getValue(1);
1234 ResultVals[0] = Chain.getValue(0);
1242 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1243 InFlag).getValue(1);
1244 ResultVals[0] = Chain.getValue(0);
1249 // If the function returns void, just return the chain.
1250 if (NumResults == 0)
1253 // Otherwise, merge everything together with a MERGE_VALUES node.
1254 ResultVals[NumResults++] = Chain;
1255 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1256 return Res.getValue(Op.getResNo());
1260 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1261 SmallVector<CCValAssign, 16> RVLocs;
1262 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1263 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1264 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1265 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1267 // If this is the first return lowered for this function, add the regs to the
1268 // liveout set for the function.
1269 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1270 for (unsigned i = 0; i != RVLocs.size(); ++i)
1271 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1274 SDValue Chain = Op.getOperand(0);
1277 // Copy the result values into the output registers.
1278 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1279 CCValAssign &VA = RVLocs[i];
1280 assert(VA.isRegLoc() && "Can only return in registers!");
1281 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1282 Flag = Chain.getValue(1);
1286 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1288 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1292 //===----------------------------------------------------------------------===//
1293 // Vector related lowering:
1294 //===----------------------------------------------------------------------===//
1296 static ConstantSDNode *
1297 getVecImm(SDNode *N) {
1298 SDValue OpVal(0, 0);
1300 // Check to see if this buildvec has a single non-undef value in its elements.
1301 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1302 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1303 if (OpVal.getNode() == 0)
1304 OpVal = N->getOperand(i);
1305 else if (OpVal != N->getOperand(i))
1309 if (OpVal.getNode() != 0) {
1310 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1315 return 0; // All UNDEF: use implicit def.; not Constant node
1318 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1319 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1321 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1323 if (ConstantSDNode *CN = getVecImm(N)) {
1324 uint64_t Value = CN->getZExtValue();
1325 if (ValueType == MVT::i64) {
1326 uint64_t UValue = CN->getZExtValue();
1327 uint32_t upper = uint32_t(UValue >> 32);
1328 uint32_t lower = uint32_t(UValue);
1331 Value = Value >> 32;
1333 if (Value <= 0x3ffff)
1334 return DAG.getTargetConstant(Value, ValueType);
1340 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1341 /// and the value fits into a signed 16-bit constant, and if so, return the
1343 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1345 if (ConstantSDNode *CN = getVecImm(N)) {
1346 int64_t Value = CN->getSExtValue();
1347 if (ValueType == MVT::i64) {
1348 uint64_t UValue = CN->getZExtValue();
1349 uint32_t upper = uint32_t(UValue >> 32);
1350 uint32_t lower = uint32_t(UValue);
1353 Value = Value >> 32;
1355 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1356 return DAG.getTargetConstant(Value, ValueType);
1363 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1364 /// and the value fits into a signed 10-bit constant, and if so, return the
1366 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1368 if (ConstantSDNode *CN = getVecImm(N)) {
1369 int64_t Value = CN->getSExtValue();
1370 if (ValueType == MVT::i64) {
1371 uint64_t UValue = CN->getZExtValue();
1372 uint32_t upper = uint32_t(UValue >> 32);
1373 uint32_t lower = uint32_t(UValue);
1376 Value = Value >> 32;
1378 if (isS10Constant(Value))
1379 return DAG.getTargetConstant(Value, ValueType);
1385 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1386 /// and the value fits into a signed 8-bit constant, and if so, return the
1389 /// @note: The incoming vector is v16i8 because that's the only way we can load
1390 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1392 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1394 if (ConstantSDNode *CN = getVecImm(N)) {
1395 int Value = (int) CN->getZExtValue();
1396 if (ValueType == MVT::i16
1397 && Value <= 0xffff /* truncated from uint64_t */
1398 && ((short) Value >> 8) == ((short) Value & 0xff))
1399 return DAG.getTargetConstant(Value & 0xff, ValueType);
1400 else if (ValueType == MVT::i8
1401 && (Value & 0xff) == Value)
1402 return DAG.getTargetConstant(Value, ValueType);
1408 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1409 /// and the value fits into a signed 16-bit constant, and if so, return the
1411 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1413 if (ConstantSDNode *CN = getVecImm(N)) {
1414 uint64_t Value = CN->getZExtValue();
1415 if ((ValueType == MVT::i32
1416 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1417 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1418 return DAG.getTargetConstant(Value >> 16, ValueType);
1424 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1425 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1426 if (ConstantSDNode *CN = getVecImm(N)) {
1427 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1433 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1434 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1435 if (ConstantSDNode *CN = getVecImm(N)) {
1436 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1442 // If this is a vector of constants or undefs, get the bits. A bit in
1443 // UndefBits is set if the corresponding element of the vector is an
1444 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1445 // zero. Return true if this is not an array of constants, false if it is.
1447 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1448 uint64_t UndefBits[2]) {
1449 // Start with zero'd results.
1450 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1452 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1453 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1454 SDValue OpVal = BV->getOperand(i);
1456 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1457 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1459 uint64_t EltBits = 0;
1460 if (OpVal.getOpcode() == ISD::UNDEF) {
1461 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1462 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1464 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1465 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1466 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1467 const APFloat &apf = CN->getValueAPF();
1468 EltBits = (CN->getValueType(0) == MVT::f32
1469 ? FloatToBits(apf.convertToFloat())
1470 : DoubleToBits(apf.convertToDouble()));
1472 // Nonconstant element.
1476 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1479 //printf("%llx %llx %llx %llx\n",
1480 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1484 /// If this is a splat (repetition) of a value across the whole vector, return
1485 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1486 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1487 /// SplatSize = 1 byte.
1488 static bool isConstantSplat(const uint64_t Bits128[2],
1489 const uint64_t Undef128[2],
1491 uint64_t &SplatBits, uint64_t &SplatUndef,
1493 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1494 // the same as the lower 64-bits, ignoring undefs.
1495 uint64_t Bits64 = Bits128[0] | Bits128[1];
1496 uint64_t Undef64 = Undef128[0] & Undef128[1];
1497 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1498 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1499 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1500 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1502 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1503 if (MinSplatBits < 64) {
1505 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1507 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1508 if (MinSplatBits < 32) {
1510 // If the top 16-bits are different than the lower 16-bits, ignoring
1511 // undefs, we have an i32 splat.
1512 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1513 if (MinSplatBits < 16) {
1514 // If the top 8-bits are different than the lower 8-bits, ignoring
1515 // undefs, we have an i16 splat.
1516 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1517 == ((Bits16 >> 8) & ~Undef16)) {
1518 // Otherwise, we have an 8-bit splat.
1519 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1520 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1526 SplatUndef = Undef16;
1533 SplatUndef = Undef32;
1539 SplatBits = Bits128[0];
1540 SplatUndef = Undef128[0];
1546 return false; // Can't be a splat if two pieces don't match.
1549 // If this is a case we can't handle, return null and let the default
1550 // expansion code take care of it. If we CAN select this case, and if it
1551 // selects to a single instruction, return Op. Otherwise, if we can codegen
1552 // this case more efficiently than a constant pool load, lower it to the
1553 // sequence of ops that should be used.
1554 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1555 MVT VT = Op.getValueType();
1556 // If this is a vector of constants or undefs, get the bits. A bit in
1557 // UndefBits is set if the corresponding element of the vector is an
1558 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1560 uint64_t VectorBits[2];
1561 uint64_t UndefBits[2];
1562 uint64_t SplatBits, SplatUndef;
1564 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1565 || !isConstantSplat(VectorBits, UndefBits,
1566 VT.getVectorElementType().getSizeInBits(),
1567 SplatBits, SplatUndef, SplatSize))
1568 return SDValue(); // Not a constant vector, not a splat.
1570 switch (VT.getSimpleVT()) {
1573 uint32_t Value32 = SplatBits;
1574 assert(SplatSize == 4
1575 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1576 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1577 SDValue T = DAG.getConstant(Value32, MVT::i32);
1578 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1579 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1583 uint64_t f64val = SplatBits;
1584 assert(SplatSize == 8
1585 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1586 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1587 SDValue T = DAG.getConstant(f64val, MVT::i64);
1588 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1589 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1593 // 8-bit constants have to be expanded to 16-bits
1594 unsigned short Value16 = SplatBits | (SplatBits << 8);
1596 for (int i = 0; i < 8; ++i)
1597 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1598 return DAG.getNode(ISD::BIT_CONVERT, VT,
1599 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1602 unsigned short Value16;
1604 Value16 = (unsigned short) (SplatBits & 0xffff);
1606 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1607 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1609 for (int i = 0; i < 8; ++i) Ops[i] = T;
1610 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1613 unsigned int Value = SplatBits;
1614 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1615 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1618 uint64_t val = SplatBits;
1619 uint32_t upper = uint32_t(val >> 32);
1620 uint32_t lower = uint32_t(val);
1622 if (upper == lower) {
1623 // Magic constant that can be matched by IL, ILA, et. al.
1624 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1625 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1629 SmallVector<SDValue, 16> ShufBytes;
1631 bool upper_special, lower_special;
1633 // NOTE: This code creates common-case shuffle masks that can be easily
1634 // detected as common expressions. It is not attempting to create highly
1635 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1637 // Detect if the upper or lower half is a special shuffle mask pattern:
1638 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1639 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1641 // Create lower vector if not a special pattern
1642 if (!lower_special) {
1643 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1644 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1645 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1646 LO32C, LO32C, LO32C, LO32C));
1649 // Create upper vector if not a special pattern
1650 if (!upper_special) {
1651 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1652 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1653 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1654 HI32C, HI32C, HI32C, HI32C));
1657 // If either upper or lower are special, then the two input operands are
1658 // the same (basically, one of them is a "don't care")
1663 if (lower_special && upper_special) {
1664 // Unhappy situation... both upper and lower are special, so punt with
1665 // a target constant:
1666 SDValue Zero = DAG.getConstant(0, MVT::i32);
1667 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1671 for (int i = 0; i < 4; ++i) {
1673 for (int j = 0; j < 4; ++j) {
1675 bool process_upper, process_lower;
1677 process_upper = (upper_special && (i & 1) == 0);
1678 process_lower = (lower_special && (i & 1) == 1);
1680 if (process_upper || process_lower) {
1681 if ((process_upper && upper == 0)
1682 || (process_lower && lower == 0))
1684 else if ((process_upper && upper == 0xffffffff)
1685 || (process_lower && lower == 0xffffffff))
1687 else if ((process_upper && upper == 0x80000000)
1688 || (process_lower && lower == 0x80000000))
1689 val |= (j == 0 ? 0xe0 : 0x80);
1691 val |= i * 4 + j + ((i & 1) * 16);
1694 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1697 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1698 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1699 &ShufBytes[0], ShufBytes.size()));
1707 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1708 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1709 /// permutation vector, V3, is monotonically increasing with one "exception"
1710 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1711 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1712 /// In either case, the net result is going to eventually invoke SHUFB to
1713 /// permute/shuffle the bytes from V1 and V2.
1715 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1716 /// control word for byte/halfword/word insertion. This takes care of a single
1717 /// element move from V2 into V1.
1719 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1720 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1721 SDValue V1 = Op.getOperand(0);
1722 SDValue V2 = Op.getOperand(1);
1723 SDValue PermMask = Op.getOperand(2);
1725 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1727 // If we have a single element being moved from V1 to V2, this can be handled
1728 // using the C*[DX] compute mask instructions, but the vector elements have
1729 // to be monotonically increasing with one exception element.
1730 MVT EltVT = V1.getValueType().getVectorElementType();
1731 unsigned EltsFromV2 = 0;
1733 unsigned V2EltIdx0 = 0;
1734 unsigned CurrElt = 0;
1735 bool monotonic = true;
1736 if (EltVT == MVT::i8)
1738 else if (EltVT == MVT::i16)
1740 else if (EltVT == MVT::i32)
1743 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1745 for (unsigned i = 0, e = PermMask.getNumOperands();
1746 EltsFromV2 <= 1 && monotonic && i != e;
1749 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1752 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1754 if (SrcElt >= V2EltIdx0) {
1756 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1757 } else if (CurrElt != SrcElt) {
1764 if (EltsFromV2 == 1 && monotonic) {
1765 // Compute mask and shuffle
1766 MachineFunction &MF = DAG.getMachineFunction();
1767 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1768 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1769 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1770 // Initialize temporary register to 0
1771 SDValue InitTempReg =
1772 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1773 // Copy register's contents as index in INSERT_MASK:
1774 SDValue ShufMaskOp =
1775 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1776 DAG.getTargetConstant(V2Elt, MVT::i32),
1777 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1778 // Use shuffle mask in SHUFB synthetic instruction:
1779 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1781 // Convert the SHUFFLE_VECTOR mask's input element units to the
1783 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1785 SmallVector<SDValue, 16> ResultMask;
1786 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1788 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1791 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1793 for (unsigned j = 0; j < BytesPerElement; ++j) {
1794 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1799 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1800 &ResultMask[0], ResultMask.size());
1801 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1805 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1806 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1808 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1809 // For a constant, build the appropriate constant vector, which will
1810 // eventually simplify to a vector register load.
1812 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1813 SmallVector<SDValue, 16> ConstVecValues;
1817 // Create a constant vector:
1818 switch (Op.getValueType().getSimpleVT()) {
1819 default: assert(0 && "Unexpected constant value type in "
1820 "LowerSCALAR_TO_VECTOR");
1821 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1822 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1823 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1824 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1825 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1826 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1829 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1830 for (size_t j = 0; j < n_copies; ++j)
1831 ConstVecValues.push_back(CValue);
1833 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1834 &ConstVecValues[0], ConstVecValues.size());
1836 // Otherwise, copy the value from one register to another:
1837 switch (Op0.getValueType().getSimpleVT()) {
1838 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1845 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1852 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1853 switch (Op.getValueType().getSimpleVT()) {
1855 cerr << "CellSPU: Unknown vector multiplication, got "
1856 << Op.getValueType().getMVTString()
1862 SDValue rA = Op.getOperand(0);
1863 SDValue rB = Op.getOperand(1);
1864 SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1865 SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1866 SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1867 SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1869 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1873 // Multiply two v8i16 vectors (pipeline friendly version):
1874 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1875 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1876 // c) Use SELB to select upper and lower halves from the intermediate results
1878 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1879 // dual-issue. This code does manage to do this, even if it's a little on
1882 MachineFunction &MF = DAG.getMachineFunction();
1883 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1884 SDValue Chain = Op.getOperand(0);
1885 SDValue rA = Op.getOperand(0);
1886 SDValue rB = Op.getOperand(1);
1887 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1888 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1891 DAG.getCopyToReg(Chain, FSMBIreg,
1892 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1893 DAG.getConstant(0xcccc, MVT::i16)));
1896 DAG.getCopyToReg(FSMBOp, HiProdReg,
1897 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1899 SDValue HHProd_v4i32 =
1900 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1901 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1903 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1904 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1905 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1906 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1908 DAG.getConstant(16, MVT::i16))),
1909 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1912 // This M00sE is N@stI! (apologies to Monty Python)
1914 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1915 // is to break it all apart, sign extend, and reassemble the various
1916 // intermediate products.
1918 SDValue rA = Op.getOperand(0);
1919 SDValue rB = Op.getOperand(1);
1920 SDValue c8 = DAG.getConstant(8, MVT::i32);
1921 SDValue c16 = DAG.getConstant(16, MVT::i32);
1924 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1925 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1926 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1928 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1930 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1933 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1934 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1936 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1937 DAG.getConstant(0x2222, MVT::i16));
1939 SDValue LoProdParts =
1940 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1941 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1942 LLProd, LHProd, FSMBmask));
1944 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1947 DAG.getNode(ISD::AND, MVT::v4i32,
1949 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1950 LoProdMask, LoProdMask,
1951 LoProdMask, LoProdMask));
1954 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1955 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1958 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1959 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1962 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1963 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1964 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1967 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1968 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1969 DAG.getNode(SPUISD::VEC_SRA,
1970 MVT::v4i32, rAH, c8)),
1971 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1972 DAG.getNode(SPUISD::VEC_SRA,
1973 MVT::v4i32, rBH, c8)));
1976 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1978 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
1982 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
1984 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
1985 DAG.getNode(ISD::OR, MVT::v4i32,
1993 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
1994 MachineFunction &MF = DAG.getMachineFunction();
1995 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1997 SDValue A = Op.getOperand(0);
1998 SDValue B = Op.getOperand(1);
1999 MVT VT = Op.getValueType();
2001 unsigned VRegBR, VRegC;
2003 if (VT == MVT::f32) {
2004 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2005 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2007 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2008 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2010 // TODO: make sure we're feeding FPInterp the right arguments
2011 // Right now: fi B, frest(B)
2014 // (Floating Interpolate (FP Reciprocal Estimate B))
2016 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2017 DAG.getNode(SPUISD::FPInterp, VT, B,
2018 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2020 // Computes A * BRcpl and stores in a temporary register
2022 DAG.getCopyToReg(BRcpl, VRegC,
2023 DAG.getNode(ISD::FMUL, VT, A,
2024 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2025 // What's the Chain variable do? It's magic!
2026 // TODO: set Chain = Op(0).getEntryNode()
2028 return DAG.getNode(ISD::FADD, VT,
2029 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2030 DAG.getNode(ISD::FMUL, VT,
2031 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2032 DAG.getNode(ISD::FSUB, VT, A,
2033 DAG.getNode(ISD::FMUL, VT, B,
2034 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2037 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2038 MVT VT = Op.getValueType();
2039 SDValue N = Op.getOperand(0);
2040 SDValue Elt = Op.getOperand(1);
2041 SDValue ShufMask[16];
2042 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2044 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2046 int EltNo = (int) C->getZExtValue();
2049 if (VT == MVT::i8 && EltNo >= 16)
2050 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2051 else if (VT == MVT::i16 && EltNo >= 8)
2052 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2053 else if (VT == MVT::i32 && EltNo >= 4)
2054 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2055 else if (VT == MVT::i64 && EltNo >= 2)
2056 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2058 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2059 // i32 and i64: Element 0 is the preferred slot
2060 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2063 // Need to generate shuffle mask and extract:
2064 int prefslot_begin = -1, prefslot_end = -1;
2065 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2067 switch (VT.getSimpleVT()) {
2069 assert(false && "Invalid value type!");
2071 prefslot_begin = prefslot_end = 3;
2075 prefslot_begin = 2; prefslot_end = 3;
2080 prefslot_begin = 0; prefslot_end = 3;
2085 prefslot_begin = 0; prefslot_end = 7;
2090 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2091 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2093 for (int i = 0; i < 16; ++i) {
2094 // zero fill uppper part of preferred slot, don't care about the
2096 unsigned int mask_val;
2098 if (i <= prefslot_end) {
2100 ((i < prefslot_begin)
2102 : elt_byte + (i - prefslot_begin));
2104 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2106 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2109 SDValue ShufMaskVec =
2110 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2112 sizeof(ShufMask) / sizeof(ShufMask[0]));
2114 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2115 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2116 N, N, ShufMaskVec));
2120 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2121 SDValue VecOp = Op.getOperand(0);
2122 SDValue ValOp = Op.getOperand(1);
2123 SDValue IdxOp = Op.getOperand(2);
2124 MVT VT = Op.getValueType();
2126 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2127 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2129 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2130 // Use $2 because it's always 16-byte aligned and it's available:
2131 SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2134 DAG.getNode(SPUISD::SHUFB, VT,
2135 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2137 DAG.getNode(SPUISD::INSERT_MASK, VT,
2138 DAG.getNode(ISD::ADD, PtrVT,
2140 DAG.getConstant(CN->getZExtValue(),
2146 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2148 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2150 assert(Op.getValueType() == MVT::i8);
2153 assert(0 && "Unhandled i8 math operator");
2157 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2159 SDValue N1 = Op.getOperand(1);
2160 N0 = (N0.getOpcode() != ISD::Constant
2161 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2162 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2164 N1 = (N1.getOpcode() != ISD::Constant
2165 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2166 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2168 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2169 DAG.getNode(Opc, MVT::i16, N0, N1));
2173 SDValue N1 = Op.getOperand(1);
2175 N0 = (N0.getOpcode() != ISD::Constant
2176 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2177 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2179 N1Opc = N1.getValueType().bitsLT(MVT::i32)
2182 N1 = (N1.getOpcode() != ISD::Constant
2183 ? DAG.getNode(N1Opc, MVT::i32, N1)
2184 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2187 DAG.getNode(ISD::OR, MVT::i16, N0,
2188 DAG.getNode(ISD::SHL, MVT::i16,
2189 N0, DAG.getConstant(8, MVT::i32)));
2190 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2191 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2195 SDValue N1 = Op.getOperand(1);
2197 N0 = (N0.getOpcode() != ISD::Constant
2198 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2199 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2201 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2204 N1 = (N1.getOpcode() != ISD::Constant
2205 ? DAG.getNode(N1Opc, MVT::i16, N1)
2206 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2208 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2209 DAG.getNode(Opc, MVT::i16, N0, N1));
2212 SDValue N1 = Op.getOperand(1);
2214 N0 = (N0.getOpcode() != ISD::Constant
2215 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2216 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2218 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2221 N1 = (N1.getOpcode() != ISD::Constant
2222 ? DAG.getNode(N1Opc, MVT::i16, N1)
2223 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2225 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2226 DAG.getNode(Opc, MVT::i16, N0, N1));
2229 SDValue N1 = Op.getOperand(1);
2231 N0 = (N0.getOpcode() != ISD::Constant
2232 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2233 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2235 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2236 N1 = (N1.getOpcode() != ISD::Constant
2237 ? DAG.getNode(N1Opc, MVT::i16, N1)
2238 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2240 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2241 DAG.getNode(Opc, MVT::i16, N0, N1));
2249 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2251 MVT VT = Op.getValueType();
2252 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2254 SDValue Op0 = Op.getOperand(0);
2257 case ISD::ZERO_EXTEND:
2258 case ISD::SIGN_EXTEND:
2259 case ISD::ANY_EXTEND: {
2260 MVT Op0VT = Op0.getValueType();
2261 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2263 assert(Op0VT == MVT::i32
2264 && "CellSPU: Zero/sign extending something other than i32");
2265 DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2267 unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2268 ? SPUISD::ROTBYTES_RIGHT_S
2269 : SPUISD::ROTQUAD_RZ_BYTES);
2270 SDValue PromoteScalar =
2271 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2273 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2274 DAG.getNode(ISD::BIT_CONVERT, VecVT,
2275 DAG.getNode(NewOpc, Op0VecVT,
2277 DAG.getConstant(4, MVT::i32))));
2281 // Turn operands into vectors to satisfy type checking (shufb works on
2284 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2286 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2287 SmallVector<SDValue, 16> ShufBytes;
2289 // Create the shuffle mask for "rotating" the borrow up one register slot
2290 // once the borrow is generated.
2291 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2292 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2293 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2294 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2297 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2298 SDValue ShiftedCarry =
2299 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2301 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2302 &ShufBytes[0], ShufBytes.size()));
2304 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2305 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2306 Op0, Op1, ShiftedCarry));
2310 // Turn operands into vectors to satisfy type checking (shufb works on
2313 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2315 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2316 SmallVector<SDValue, 16> ShufBytes;
2318 // Create the shuffle mask for "rotating" the borrow up one register slot
2319 // once the borrow is generated.
2320 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2321 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2322 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2323 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2326 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2327 SDValue ShiftedBorrow =
2328 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2329 BorrowGen, BorrowGen,
2330 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2331 &ShufBytes[0], ShufBytes.size()));
2333 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2334 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2335 Op0, Op1, ShiftedBorrow));
2339 SDValue ShiftAmt = Op.getOperand(1);
2340 MVT ShiftAmtVT = ShiftAmt.getValueType();
2341 SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2343 DAG.getNode(SPUISD::SELB, VecVT,
2345 DAG.getConstant(0, VecVT),
2346 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2347 DAG.getConstant(0xff00ULL, MVT::i16)));
2348 SDValue ShiftAmtBytes =
2349 DAG.getNode(ISD::SRL, ShiftAmtVT,
2351 DAG.getConstant(3, ShiftAmtVT));
2352 SDValue ShiftAmtBits =
2353 DAG.getNode(ISD::AND, ShiftAmtVT,
2355 DAG.getConstant(7, ShiftAmtVT));
2357 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2358 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2359 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2360 MaskLower, ShiftAmtBytes),
2365 MVT VT = Op.getValueType();
2366 SDValue ShiftAmt = Op.getOperand(1);
2367 MVT ShiftAmtVT = ShiftAmt.getValueType();
2368 SDValue ShiftAmtBytes =
2369 DAG.getNode(ISD::SRL, ShiftAmtVT,
2371 DAG.getConstant(3, ShiftAmtVT));
2372 SDValue ShiftAmtBits =
2373 DAG.getNode(ISD::AND, ShiftAmtVT,
2375 DAG.getConstant(7, ShiftAmtVT));
2377 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2378 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2379 Op0, ShiftAmtBytes),
2384 // Promote Op0 to vector
2386 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2387 SDValue ShiftAmt = Op.getOperand(1);
2388 MVT ShiftVT = ShiftAmt.getValueType();
2390 // Negate variable shift amounts
2391 if (!isa<ConstantSDNode>(ShiftAmt)) {
2392 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2393 DAG.getConstant(0, ShiftVT), ShiftAmt);
2396 SDValue UpperHalfSign =
2397 DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2398 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2399 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2400 Op0, DAG.getConstant(31, MVT::i32))));
2401 SDValue UpperHalfSignMask =
2402 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2403 SDValue UpperLowerMask =
2404 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2405 DAG.getConstant(0xff00, MVT::i16));
2406 SDValue UpperLowerSelect =
2407 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2408 UpperHalfSignMask, Op0, UpperLowerMask);
2409 SDValue RotateLeftBytes =
2410 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2411 UpperLowerSelect, ShiftAmt);
2412 SDValue RotateLeftBits =
2413 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2414 RotateLeftBytes, ShiftAmt);
2416 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2424 //! Lower byte immediate operations for v16i8 vectors:
2426 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2429 MVT VT = Op.getValueType();
2431 ConstVec = Op.getOperand(0);
2432 Arg = Op.getOperand(1);
2433 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2434 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2435 ConstVec = ConstVec.getOperand(0);
2437 ConstVec = Op.getOperand(1);
2438 Arg = Op.getOperand(0);
2439 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2440 ConstVec = ConstVec.getOperand(0);
2445 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2446 uint64_t VectorBits[2];
2447 uint64_t UndefBits[2];
2448 uint64_t SplatBits, SplatUndef;
2451 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2452 && isConstantSplat(VectorBits, UndefBits,
2453 VT.getVectorElementType().getSizeInBits(),
2454 SplatBits, SplatUndef, SplatSize)) {
2456 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2457 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2459 // Turn the BUILD_VECTOR into a set of target constants:
2460 for (size_t i = 0; i < tcVecSize; ++i)
2463 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2464 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2467 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2468 // lowered. Return the operation, rather than a null SDValue.
2472 //! Lower i32 multiplication
2473 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2475 switch (VT.getSimpleVT()) {
2477 cerr << "CellSPU: Unknown LowerMUL value type, got "
2478 << Op.getValueType().getMVTString()
2484 SDValue rA = Op.getOperand(0);
2485 SDValue rB = Op.getOperand(1);
2487 return DAG.getNode(ISD::ADD, MVT::i32,
2488 DAG.getNode(ISD::ADD, MVT::i32,
2489 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2490 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2491 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2498 //! Custom lowering for CTPOP (count population)
2500 Custom lowering code that counts the number ones in the input
2501 operand. SPU has such an instruction, but it counts the number of
2502 ones per byte, which then have to be accumulated.
2504 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2505 MVT VT = Op.getValueType();
2506 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2508 switch (VT.getSimpleVT()) {
2510 assert(false && "Invalid value type!");
2512 SDValue N = Op.getOperand(0);
2513 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2515 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2516 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2518 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2522 MachineFunction &MF = DAG.getMachineFunction();
2523 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2525 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2527 SDValue N = Op.getOperand(0);
2528 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2529 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2530 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2532 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2533 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2535 // CNTB_result becomes the chain to which all of the virtual registers
2536 // CNTB_reg, SUM1_reg become associated:
2537 SDValue CNTB_result =
2538 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2540 SDValue CNTB_rescopy =
2541 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2543 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2545 return DAG.getNode(ISD::AND, MVT::i16,
2546 DAG.getNode(ISD::ADD, MVT::i16,
2547 DAG.getNode(ISD::SRL, MVT::i16,
2554 MachineFunction &MF = DAG.getMachineFunction();
2555 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2557 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2558 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2560 SDValue N = Op.getOperand(0);
2561 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2562 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2563 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2564 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2566 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2567 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2569 // CNTB_result becomes the chain to which all of the virtual registers
2570 // CNTB_reg, SUM1_reg become associated:
2571 SDValue CNTB_result =
2572 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2574 SDValue CNTB_rescopy =
2575 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2578 DAG.getNode(ISD::SRL, MVT::i32,
2579 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2582 DAG.getNode(ISD::ADD, MVT::i32,
2583 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2585 SDValue Sum1_rescopy =
2586 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2589 DAG.getNode(ISD::SRL, MVT::i32,
2590 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2593 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2594 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2596 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2606 /// LowerOperation - Provide custom lowering hooks for some operations.
2609 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2611 unsigned Opc = (unsigned) Op.getOpcode();
2612 MVT VT = Op.getValueType();
2616 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2617 cerr << "Op.getOpcode() = " << Opc << "\n";
2618 cerr << "*Op.getNode():\n";
2619 Op.getNode()->dump();
2625 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2627 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2628 case ISD::ConstantPool:
2629 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2630 case ISD::GlobalAddress:
2631 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2632 case ISD::JumpTable:
2633 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2635 return LowerConstant(Op, DAG);
2636 case ISD::ConstantFP:
2637 return LowerConstantFP(Op, DAG);
2639 return LowerBRCOND(Op, DAG);
2640 case ISD::FORMAL_ARGUMENTS:
2641 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2643 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2645 return LowerRET(Op, DAG, getTargetMachine());
2648 // i8, i64 math ops:
2649 case ISD::ZERO_EXTEND:
2650 case ISD::SIGN_EXTEND:
2651 case ISD::ANY_EXTEND:
2660 return LowerI8Math(Op, DAG, Opc);
2661 else if (VT == MVT::i64)
2662 return LowerI64Math(Op, DAG, Opc);
2666 // Vector-related lowering.
2667 case ISD::BUILD_VECTOR:
2668 return LowerBUILD_VECTOR(Op, DAG);
2669 case ISD::SCALAR_TO_VECTOR:
2670 return LowerSCALAR_TO_VECTOR(Op, DAG);
2671 case ISD::VECTOR_SHUFFLE:
2672 return LowerVECTOR_SHUFFLE(Op, DAG);
2673 case ISD::EXTRACT_VECTOR_ELT:
2674 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2675 case ISD::INSERT_VECTOR_ELT:
2676 return LowerINSERT_VECTOR_ELT(Op, DAG);
2678 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2682 return LowerByteImmed(Op, DAG);
2684 // Vector and i8 multiply:
2687 return LowerVectorMUL(Op, DAG);
2688 else if (VT == MVT::i8)
2689 return LowerI8Math(Op, DAG, Opc);
2691 return LowerMUL(Op, DAG, VT, Opc);
2694 if (VT == MVT::f32 || VT == MVT::v4f32)
2695 return LowerFDIVf32(Op, DAG);
2696 // else if (Op.getValueType() == MVT::f64)
2697 // return LowerFDIVf64(Op, DAG);
2699 assert(0 && "Calling FDIV on unsupported MVT");
2702 return LowerCTPOP(Op, DAG);
2708 SDNode *SPUTargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG)
2711 unsigned Opc = (unsigned) N->getOpcode();
2712 MVT OpVT = N->getValueType(0);
2716 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2717 cerr << "Op.getOpcode() = " << Opc << "\n";
2718 cerr << "*Op.getNode():\n";
2726 /* Otherwise, return unchanged */
2730 //===----------------------------------------------------------------------===//
2731 // Target Optimization Hooks
2732 //===----------------------------------------------------------------------===//
2735 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2738 TargetMachine &TM = getTargetMachine();
2740 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2741 SelectionDAG &DAG = DCI.DAG;
2742 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2743 SDValue Result; // Initially, NULL result
2745 switch (N->getOpcode()) {
2748 SDValue Op1 = N->getOperand(1);
2750 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2751 SDValue Op01 = Op0.getOperand(1);
2752 if (Op01.getOpcode() == ISD::Constant
2753 || Op01.getOpcode() == ISD::TargetConstant) {
2754 // (add <const>, (SPUindirect <arg>, <const>)) ->
2755 // (SPUindirect <arg>, <const + const>)
2756 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2757 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2758 SDValue combinedConst =
2759 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2760 Op0.getValueType());
2762 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2763 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2764 DEBUG(cerr << "With: (SPUindirect <arg>, "
2765 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2766 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2767 Op0.getOperand(0), combinedConst);
2769 } else if (isa<ConstantSDNode>(Op0)
2770 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2771 SDValue Op11 = Op1.getOperand(1);
2772 if (Op11.getOpcode() == ISD::Constant
2773 || Op11.getOpcode() == ISD::TargetConstant) {
2774 // (add (SPUindirect <arg>, <const>), <const>) ->
2775 // (SPUindirect <arg>, <const + const>)
2776 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2777 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2778 SDValue combinedConst =
2779 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2780 Op0.getValueType());
2782 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2783 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2784 DEBUG(cerr << "With: (SPUindirect <arg>, "
2785 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2787 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2788 Op1.getOperand(0), combinedConst);
2793 case ISD::SIGN_EXTEND:
2794 case ISD::ZERO_EXTEND:
2795 case ISD::ANY_EXTEND: {
2796 if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2797 N->getValueType(0) == Op0.getValueType()) {
2798 // (any_extend (SPUextract_elt0 <arg>)) ->
2799 // (SPUextract_elt0 <arg>)
2800 // Types must match, however...
2801 DEBUG(cerr << "Replace: ");
2802 DEBUG(N->dump(&DAG));
2803 DEBUG(cerr << "\nWith: ");
2804 DEBUG(Op0.getNode()->dump(&DAG));
2805 DEBUG(cerr << "\n");
2811 case SPUISD::IndirectAddr: {
2812 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2813 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2814 if (CN->getZExtValue() == 0) {
2815 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2816 // (SPUaform <addr>, 0)
2818 DEBUG(cerr << "Replace: ");
2819 DEBUG(N->dump(&DAG));
2820 DEBUG(cerr << "\nWith: ");
2821 DEBUG(Op0.getNode()->dump(&DAG));
2822 DEBUG(cerr << "\n");
2829 case SPUISD::SHLQUAD_L_BITS:
2830 case SPUISD::SHLQUAD_L_BYTES:
2831 case SPUISD::VEC_SHL:
2832 case SPUISD::VEC_SRL:
2833 case SPUISD::VEC_SRA:
2834 case SPUISD::ROTQUAD_RZ_BYTES:
2835 case SPUISD::ROTQUAD_RZ_BITS: {
2836 SDValue Op1 = N->getOperand(1);
2838 if (isa<ConstantSDNode>(Op1)) {
2839 // Kill degenerate vector shifts:
2840 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2842 if (CN->getZExtValue() == 0) {
2848 case SPUISD::PROMOTE_SCALAR: {
2849 switch (Op0.getOpcode()) {
2852 case ISD::ANY_EXTEND:
2853 case ISD::ZERO_EXTEND:
2854 case ISD::SIGN_EXTEND: {
2855 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2857 // but only if the SPUpromote_scalar and <arg> types match.
2858 SDValue Op00 = Op0.getOperand(0);
2859 if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2860 SDValue Op000 = Op00.getOperand(0);
2861 if (Op000.getValueType() == N->getValueType(0)) {
2867 case SPUISD::EXTRACT_ELT0: {
2868 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2870 Result = Op0.getOperand(0);
2877 // Otherwise, return unchanged.
2879 if (Result.getNode()) {
2880 DEBUG(cerr << "\nReplace.SPU: ");
2881 DEBUG(N->dump(&DAG));
2882 DEBUG(cerr << "\nWith: ");
2883 DEBUG(Result.getNode()->dump(&DAG));
2884 DEBUG(cerr << "\n");
2891 //===----------------------------------------------------------------------===//
2892 // Inline Assembly Support
2893 //===----------------------------------------------------------------------===//
2895 /// getConstraintType - Given a constraint letter, return the type of
2896 /// constraint it is for this target.
2897 SPUTargetLowering::ConstraintType
2898 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2899 if (ConstraintLetter.size() == 1) {
2900 switch (ConstraintLetter[0]) {
2907 return C_RegisterClass;
2910 return TargetLowering::getConstraintType(ConstraintLetter);
2913 std::pair<unsigned, const TargetRegisterClass*>
2914 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2917 if (Constraint.size() == 1) {
2918 // GCC RS6000 Constraint Letters
2919 switch (Constraint[0]) {
2923 return std::make_pair(0U, SPU::R64CRegisterClass);
2924 return std::make_pair(0U, SPU::R32CRegisterClass);
2927 return std::make_pair(0U, SPU::R32FPRegisterClass);
2928 else if (VT == MVT::f64)
2929 return std::make_pair(0U, SPU::R64FPRegisterClass);
2932 return std::make_pair(0U, SPU::GPRCRegisterClass);
2936 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2939 //! Compute used/known bits for a SPU operand
2941 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2945 const SelectionDAG &DAG,
2946 unsigned Depth ) const {
2948 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2951 switch (Op.getOpcode()) {
2953 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2963 case SPUISD::PROMOTE_SCALAR: {
2964 SDValue Op0 = Op.getOperand(0);
2965 MVT Op0VT = Op0.getValueType();
2966 unsigned Op0VTBits = Op0VT.getSizeInBits();
2967 uint64_t InMask = Op0VT.getIntegerVTBitMask();
2968 KnownZero |= APInt(Op0VTBits, ~InMask, false);
2969 KnownOne |= APInt(Op0VTBits, InMask, false);
2973 case SPUISD::LDRESULT:
2974 case SPUISD::EXTRACT_ELT0:
2975 case SPUISD::EXTRACT_ELT0_CHAINED: {
2976 MVT OpVT = Op.getValueType();
2977 unsigned OpVTBits = OpVT.getSizeInBits();
2978 uint64_t InMask = OpVT.getIntegerVTBitMask();
2979 KnownZero |= APInt(OpVTBits, ~InMask, false);
2980 KnownOne |= APInt(OpVTBits, InMask, false);
2985 case EXTRACT_I1_ZEXT:
2986 case EXTRACT_I1_SEXT:
2987 case EXTRACT_I8_ZEXT:
2988 case EXTRACT_I8_SEXT:
2993 case SPUISD::SHLQUAD_L_BITS:
2994 case SPUISD::SHLQUAD_L_BYTES:
2995 case SPUISD::VEC_SHL:
2996 case SPUISD::VEC_SRL:
2997 case SPUISD::VEC_SRA:
2998 case SPUISD::VEC_ROTL:
2999 case SPUISD::VEC_ROTR:
3000 case SPUISD::ROTQUAD_RZ_BYTES:
3001 case SPUISD::ROTQUAD_RZ_BITS:
3002 case SPUISD::ROTBYTES_RIGHT_S:
3003 case SPUISD::ROTBYTES_LEFT:
3004 case SPUISD::ROTBYTES_LEFT_CHAINED:
3005 case SPUISD::SELECT_MASK:
3007 case SPUISD::FPInterp:
3008 case SPUISD::FPRecipEst:
3009 case SPUISD::SEXT32TO64:
3014 // LowerAsmOperandForConstraint
3016 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3017 char ConstraintLetter,
3019 std::vector<SDValue> &Ops,
3020 SelectionDAG &DAG) const {
3021 // Default, for the time being, to the base class handler
3022 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3026 /// isLegalAddressImmediate - Return true if the integer value can be used
3027 /// as the offset of the target addressing mode.
3028 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3029 const Type *Ty) const {
3030 // SPU's addresses are 256K:
3031 return (V > -(1 << 18) && V < (1 << 18) - 1);
3034 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3039 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3040 // The SPU target isn't yet aware of offsets.