1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT mapping to useful data for Cell SPU
41 struct valtype_map_s {
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDValue &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDValue &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // SPU has no sign or zero extended loads for i1, i8, i16:
134 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
135 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
136 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
137 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
138 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
139 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
140 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
141 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
143 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
144 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
145 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
146 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
147 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
148 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
149 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
150 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
152 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
153 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
154 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
156 // SPU constant load actions are custom lowered:
157 setOperationAction(ISD::Constant, MVT::i64, Custom);
158 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
159 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
161 // SPU's loads and stores have to be custom lowered:
162 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
164 MVT VT = (MVT::SimpleValueType)sctype;
166 setOperationAction(ISD::LOAD, VT, Custom);
167 setOperationAction(ISD::STORE, VT, Custom);
170 // Custom lower BRCOND for i1, i8 to "promote" the result to
171 // i32 and i16, respectively.
172 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
174 // Expand the jumptable branches
175 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
176 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
177 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
179 // SPU has no intrinsics for these particular operations:
180 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
182 // PowerPC has no SREM/UREM instructions
183 setOperationAction(ISD::SREM, MVT::i32, Expand);
184 setOperationAction(ISD::UREM, MVT::i32, Expand);
185 setOperationAction(ISD::SREM, MVT::i64, Expand);
186 setOperationAction(ISD::UREM, MVT::i64, Expand);
188 // We don't support sin/cos/sqrt/fmod
189 setOperationAction(ISD::FSIN , MVT::f64, Expand);
190 setOperationAction(ISD::FCOS , MVT::f64, Expand);
191 setOperationAction(ISD::FREM , MVT::f64, Expand);
192 setOperationAction(ISD::FSIN , MVT::f32, Expand);
193 setOperationAction(ISD::FCOS , MVT::f32, Expand);
194 setOperationAction(ISD::FREM , MVT::f32, Expand);
196 // If we're enabling GP optimizations, use hardware square root
197 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
198 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
200 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
201 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
203 // SPU can do rotate right and left, so legalize it... but customize for i8
204 // because instructions don't exist.
206 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
208 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
209 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
210 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
212 setOperationAction(ISD::ROTL, MVT::i32, Legal);
213 setOperationAction(ISD::ROTL, MVT::i16, Legal);
214 setOperationAction(ISD::ROTL, MVT::i8, Custom);
215 // SPU has no native version of shift left/right for i8
216 setOperationAction(ISD::SHL, MVT::i8, Custom);
217 setOperationAction(ISD::SRL, MVT::i8, Custom);
218 setOperationAction(ISD::SRA, MVT::i8, Custom);
219 // And SPU needs custom lowering for shift left/right for i64
220 setOperationAction(ISD::SHL, MVT::i64, Custom);
221 setOperationAction(ISD::SRL, MVT::i64, Custom);
222 setOperationAction(ISD::SRA, MVT::i64, Custom);
224 // Custom lower i8, i32 and i64 multiplications
225 setOperationAction(ISD::MUL, MVT::i8, Custom);
226 setOperationAction(ISD::MUL, MVT::i32, Custom);
227 setOperationAction(ISD::MUL, MVT::i64, Custom);
229 // Need to custom handle (some) common i8, i64 math ops
230 setOperationAction(ISD::ADD, MVT::i64, Custom);
231 setOperationAction(ISD::SUB, MVT::i8, Custom);
232 setOperationAction(ISD::SUB, MVT::i64, Custom);
234 // SPU does not have BSWAP. It does have i32 support CTLZ.
235 // CTPOP has to be custom lowered.
236 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
237 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
239 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
240 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
241 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
242 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
244 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
245 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
247 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
249 // SPU has a version of select that implements (a&~c)|(b&c), just like
250 // select ought to work:
251 setOperationAction(ISD::SELECT, MVT::i1, Promote);
252 setOperationAction(ISD::SELECT, MVT::i8, Legal);
253 setOperationAction(ISD::SELECT, MVT::i16, Legal);
254 setOperationAction(ISD::SELECT, MVT::i32, Legal);
255 setOperationAction(ISD::SELECT, MVT::i64, Expand);
257 setOperationAction(ISD::SETCC, MVT::i1, Promote);
258 setOperationAction(ISD::SETCC, MVT::i8, Legal);
259 setOperationAction(ISD::SETCC, MVT::i16, Legal);
260 setOperationAction(ISD::SETCC, MVT::i32, Legal);
261 setOperationAction(ISD::SETCC, MVT::i64, Expand);
263 // Zero extension and sign extension for i64 have to be
265 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
266 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
267 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
269 // SPU has a legal FP -> signed INT instruction
270 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
271 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
272 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
273 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
275 // FDIV on SPU requires custom lowering
276 setOperationAction(ISD::FDIV, MVT::f32, Custom);
277 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
279 // SPU has [U|S]INT_TO_FP
280 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
281 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
282 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
283 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
284 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
285 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
286 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
287 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
289 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
290 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
291 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
292 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
294 // We cannot sextinreg(i1). Expand to shifts.
295 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
297 // Support label based line numbers.
298 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
299 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
301 // We want to legalize GlobalAddress and ConstantPool nodes into the
302 // appropriate instructions to materialize the address.
303 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
305 MVT VT = (MVT::SimpleValueType)sctype;
307 setOperationAction(ISD::GlobalAddress, VT, Custom);
308 setOperationAction(ISD::ConstantPool, VT, Custom);
309 setOperationAction(ISD::JumpTable, VT, Custom);
312 // RET must be custom lowered, to meet ABI requirements
313 setOperationAction(ISD::RET, MVT::Other, Custom);
315 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
316 setOperationAction(ISD::VASTART , MVT::Other, Custom);
318 // Use the default implementation.
319 setOperationAction(ISD::VAARG , MVT::Other, Expand);
320 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
321 setOperationAction(ISD::VAEND , MVT::Other, Expand);
322 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
323 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
324 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
325 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
327 // Cell SPU has instructions for converting between i64 and fp.
328 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
329 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
331 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
332 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
334 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
335 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
337 // First set operation action for all vector types to expand. Then we
338 // will selectively turn on ones that can be effectively codegen'd.
339 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
340 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
341 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
342 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
343 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
344 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
346 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
347 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
348 MVT VT = (MVT::SimpleValueType)i;
350 // add/sub are legal for all supported vector VT's.
351 setOperationAction(ISD::ADD , VT, Legal);
352 setOperationAction(ISD::SUB , VT, Legal);
353 // mul has to be custom lowered.
354 setOperationAction(ISD::MUL , VT, Custom);
356 setOperationAction(ISD::AND , VT, Legal);
357 setOperationAction(ISD::OR , VT, Legal);
358 setOperationAction(ISD::XOR , VT, Legal);
359 setOperationAction(ISD::LOAD , VT, Legal);
360 setOperationAction(ISD::SELECT, VT, Legal);
361 setOperationAction(ISD::STORE, VT, Legal);
363 // These operations need to be expanded:
364 setOperationAction(ISD::SDIV, VT, Expand);
365 setOperationAction(ISD::SREM, VT, Expand);
366 setOperationAction(ISD::UDIV, VT, Expand);
367 setOperationAction(ISD::UREM, VT, Expand);
368 setOperationAction(ISD::FDIV, VT, Custom);
370 // Custom lower build_vector, constant pool spills, insert and
371 // extract vector elements:
372 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
373 setOperationAction(ISD::ConstantPool, VT, Custom);
374 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
375 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
376 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
377 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
380 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
381 setOperationAction(ISD::AND, MVT::v16i8, Custom);
382 setOperationAction(ISD::OR, MVT::v16i8, Custom);
383 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
384 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
386 setShiftAmountType(MVT::i32);
387 setSetCCResultContents(ZeroOrOneSetCCResult);
389 setStackPointerRegisterToSaveRestore(SPU::R1);
391 // We have target-specific dag combine patterns for the following nodes:
392 setTargetDAGCombine(ISD::ADD);
393 setTargetDAGCombine(ISD::ZERO_EXTEND);
394 setTargetDAGCombine(ISD::SIGN_EXTEND);
395 setTargetDAGCombine(ISD::ANY_EXTEND);
397 computeRegisterProperties();
401 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
403 if (node_names.empty()) {
404 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
405 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
406 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
407 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
408 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
409 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
410 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
411 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
412 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
413 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
414 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
415 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
416 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
417 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
418 = "SPUISD::EXTRACT_ELT0_CHAINED";
419 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
420 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
421 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
422 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
423 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
424 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
425 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
426 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
427 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
428 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
429 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
430 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
431 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
432 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
433 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
434 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
435 "SPUISD::ROTQUAD_RZ_BYTES";
436 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
437 "SPUISD::ROTQUAD_RZ_BITS";
438 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
439 "SPUISD::ROTBYTES_RIGHT_S";
440 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
441 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
442 "SPUISD::ROTBYTES_LEFT_CHAINED";
443 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
444 "SPUISD::ROTBYTES_LEFT_BITS";
445 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
446 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
447 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
448 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
449 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
450 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
451 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
452 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
453 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
456 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
458 return ((i != node_names.end()) ? i->second : 0);
461 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
462 MVT VT = Op.getValueType();
463 return (VT.isInteger() ? VT : MVT(MVT::i32));
466 //===----------------------------------------------------------------------===//
467 // Calling convention code:
468 //===----------------------------------------------------------------------===//
470 #include "SPUGenCallingConv.inc"
472 //===----------------------------------------------------------------------===//
473 // LowerOperation implementation
474 //===----------------------------------------------------------------------===//
476 /// Aligned load common code for CellSPU
478 \param[in] Op The SelectionDAG load or store operand
479 \param[in] DAG The selection DAG
480 \param[in] ST CellSPU subtarget information structure
481 \param[in,out] alignment Caller initializes this to the load or store node's
482 value from getAlignment(), may be updated while generating the aligned load
483 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
484 offset (divisible by 16, modulo 16 == 0)
485 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
486 offset of the preferred slot (modulo 16 != 0)
487 \param[in,out] VT Caller initializes this value type to the the load or store
488 node's loaded or stored value type; may be updated if an i1-extended load or
490 \param[out] was16aligned true if the base pointer had 16-byte alignment,
491 otherwise false. Can help to determine if the chunk needs to be rotated.
493 Both load and store lowering load a block of data aligned on a 16-byte
494 boundary. This is the common aligned load code shared between both.
497 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
499 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
500 MVT &VT, bool &was16aligned)
502 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
503 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
504 SDValue basePtr = LSN->getBasePtr();
505 SDValue chain = LSN->getChain();
507 if (basePtr.getOpcode() == ISD::ADD) {
508 SDValue Op1 = basePtr.getNode()->getOperand(1);
510 if (Op1.getOpcode() == ISD::Constant
511 || Op1.getOpcode() == ISD::TargetConstant) {
512 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
514 alignOffs = (int) CN->getZExtValue();
515 prefSlotOffs = (int) (alignOffs & 0xf);
517 // Adjust the rotation amount to ensure that the final result ends up in
518 // the preferred slot:
519 prefSlotOffs -= vtm->prefslot_byte;
520 basePtr = basePtr.getOperand(0);
522 // Loading from memory, can we adjust alignment?
523 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
524 SDValue APtr = basePtr.getOperand(0);
525 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
526 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
527 alignment = GSDN->getGlobal()->getAlignment();
532 prefSlotOffs = -vtm->prefslot_byte;
534 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
535 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
536 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
537 prefSlotOffs = (int) (alignOffs & 0xf);
538 prefSlotOffs -= vtm->prefslot_byte;
539 basePtr = DAG.getRegister(SPU::R1, VT);
542 prefSlotOffs = -vtm->prefslot_byte;
545 if (alignment == 16) {
546 // Realign the base pointer as a D-Form address:
547 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
548 basePtr = DAG.getNode(ISD::ADD, PtrVT,
550 DAG.getConstant((alignOffs & ~0xf), PtrVT));
553 // Emit the vector load:
555 return DAG.getLoad(MVT::v16i8, chain, basePtr,
556 LSN->getSrcValue(), LSN->getSrcValueOffset(),
557 LSN->isVolatile(), 16);
560 // Unaligned load or we're using the "large memory" model, which means that
561 // we have to be very pessimistic:
562 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
563 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
564 DAG.getConstant(0, PtrVT));
568 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
569 DAG.getConstant((alignOffs & ~0xf), PtrVT));
570 was16aligned = false;
571 return DAG.getLoad(MVT::v16i8, chain, basePtr,
572 LSN->getSrcValue(), LSN->getSrcValueOffset(),
573 LSN->isVolatile(), 16);
576 /// Custom lower loads for CellSPU
578 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
579 within a 16-byte block, we have to rotate to extract the requested element.
582 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
583 LoadSDNode *LN = cast<LoadSDNode>(Op);
584 SDValue the_chain = LN->getChain();
585 MVT VT = LN->getMemoryVT();
586 MVT OpVT = Op.getNode()->getValueType(0);
587 ISD::LoadExtType ExtType = LN->getExtensionType();
588 unsigned alignment = LN->getAlignment();
591 switch (LN->getAddressingMode()) {
592 case ISD::UNINDEXED: {
596 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
598 if (result.getNode() == 0)
601 the_chain = result.getValue(1);
602 // Rotate the chunk if necessary
605 if (rotamt != 0 || !was16aligned) {
606 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
611 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
613 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
614 LoadSDNode *LN1 = cast<LoadSDNode>(result);
615 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
616 DAG.getConstant(rotamt, PtrVT));
619 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
620 the_chain = result.getValue(1);
623 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
625 MVT vecVT = MVT::v16i8;
627 // Convert the loaded v16i8 vector to the appropriate vector type
628 // specified by the operand:
631 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
633 vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
636 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
637 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
638 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
639 the_chain = result.getValue(1);
641 // Handle the sign and zero-extending loads for i1 and i8:
644 if (ExtType == ISD::SEXTLOAD) {
645 NewOpC = (OpVT == MVT::i1
646 ? SPUISD::EXTRACT_I1_SEXT
647 : SPUISD::EXTRACT_I8_SEXT);
649 assert(ExtType == ISD::ZEXTLOAD);
650 NewOpC = (OpVT == MVT::i1
651 ? SPUISD::EXTRACT_I1_ZEXT
652 : SPUISD::EXTRACT_I8_ZEXT);
655 result = DAG.getNode(NewOpC, OpVT, result);
658 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
659 SDValue retops[2] = {
664 result = DAG.getNode(SPUISD::LDRESULT, retvts,
665 retops, sizeof(retops) / sizeof(retops[0]));
672 case ISD::LAST_INDEXED_MODE:
673 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
675 cerr << (unsigned) LN->getAddressingMode() << "\n";
683 /// Custom lower stores for CellSPU
685 All CellSPU stores are aligned to 16-byte boundaries, so for elements
686 within a 16-byte block, we have to generate a shuffle to insert the
687 requested element into its place, then store the resulting block.
690 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
691 StoreSDNode *SN = cast<StoreSDNode>(Op);
692 SDValue Value = SN->getValue();
693 MVT VT = Value.getValueType();
694 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
695 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
696 unsigned alignment = SN->getAlignment();
698 switch (SN->getAddressingMode()) {
699 case ISD::UNINDEXED: {
700 int chunk_offset, slot_offset;
703 // The vector type we really want to load from the 16-byte chunk, except
704 // in the case of MVT::i1, which has to be v16i8.
705 MVT vecVT, stVecVT = MVT::v16i8;
708 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
709 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
711 SDValue alignLoadVec =
712 AlignedLoad(Op, DAG, ST, SN, alignment,
713 chunk_offset, slot_offset, VT, was16aligned);
715 if (alignLoadVec.getNode() == 0)
718 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
719 SDValue basePtr = LN->getBasePtr();
720 SDValue the_chain = alignLoadVec.getValue(1);
721 SDValue theValue = SN->getValue();
725 && (theValue.getOpcode() == ISD::AssertZext
726 || theValue.getOpcode() == ISD::AssertSext)) {
727 // Drill down and get the value for zero- and sign-extended
729 theValue = theValue.getOperand(0);
734 SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
735 SDValue insertEltPtr;
738 // If the base pointer is already a D-form address, then just create
739 // a new D-form address with a slot offset and the orignal base pointer.
740 // Otherwise generate a D-form address with the slot offset relative
741 // to the stack pointer, which is always aligned.
742 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
743 DEBUG(basePtr.getNode()->dump(&DAG));
746 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
747 (basePtr.getOpcode() == ISD::ADD
748 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
749 insertEltPtr = basePtr;
751 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
754 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
755 result = DAG.getNode(SPUISD::SHUFB, vecVT,
756 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
758 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
760 result = DAG.getStore(the_chain, result, basePtr,
761 LN->getSrcValue(), LN->getSrcValueOffset(),
762 LN->isVolatile(), LN->getAlignment());
771 case ISD::LAST_INDEXED_MODE:
772 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
774 cerr << (unsigned) SN->getAddressingMode() << "\n";
782 /// Generate the address of a constant pool entry.
784 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
785 MVT PtrVT = Op.getValueType();
786 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
787 Constant *C = CP->getConstVal();
788 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
789 SDValue Zero = DAG.getConstant(0, PtrVT);
790 const TargetMachine &TM = DAG.getTarget();
792 if (TM.getRelocationModel() == Reloc::Static) {
793 if (!ST->usingLargeMem()) {
794 // Just return the SDValue with the constant pool address in it.
795 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
797 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
798 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
799 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
804 "LowerConstantPool: Relocation model other than static"
810 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
811 MVT PtrVT = Op.getValueType();
812 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
813 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
814 SDValue Zero = DAG.getConstant(0, PtrVT);
815 const TargetMachine &TM = DAG.getTarget();
817 if (TM.getRelocationModel() == Reloc::Static) {
818 if (!ST->usingLargeMem()) {
819 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
821 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
822 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
823 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
828 "LowerJumpTable: Relocation model other than static not supported.");
833 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
834 MVT PtrVT = Op.getValueType();
835 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
836 GlobalValue *GV = GSDN->getGlobal();
837 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
838 const TargetMachine &TM = DAG.getTarget();
839 SDValue Zero = DAG.getConstant(0, PtrVT);
841 if (TM.getRelocationModel() == Reloc::Static) {
842 if (!ST->usingLargeMem()) {
843 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
845 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
846 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
847 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
850 cerr << "LowerGlobalAddress: Relocation model other than static not "
859 //! Custom lower i64 integer constants
861 This code inserts all of the necessary juggling that needs to occur to load
862 a 64-bit constant into a register.
865 LowerConstant(SDValue Op, SelectionDAG &DAG) {
866 MVT VT = Op.getValueType();
867 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
869 if (VT == MVT::i64) {
870 SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
871 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
872 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
874 cerr << "LowerConstant: unhandled constant type "
884 //! Custom lower double precision floating point constants
886 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
887 MVT VT = Op.getValueType();
888 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
891 "LowerConstantFP: Node is not ConstantFPSDNode");
893 if (VT == MVT::f64) {
894 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
895 return DAG.getNode(ISD::BIT_CONVERT, VT,
896 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
902 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
904 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
906 SDValue Cond = Op.getOperand(1);
907 MVT CondVT = Cond.getValueType();
910 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
911 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
912 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
914 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
917 return SDValue(); // Unchanged
921 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
923 MachineFunction &MF = DAG.getMachineFunction();
924 MachineFrameInfo *MFI = MF.getFrameInfo();
925 MachineRegisterInfo &RegInfo = MF.getRegInfo();
926 SmallVector<SDValue, 48> ArgValues;
927 SDValue Root = Op.getOperand(0);
928 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
930 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
931 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
933 unsigned ArgOffset = SPUFrameInfo::minStackSize();
934 unsigned ArgRegIdx = 0;
935 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
937 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
939 // Add DAG nodes to load the arguments or copy them out of registers.
940 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
941 ArgNo != e; ++ArgNo) {
942 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
943 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
946 if (ArgRegIdx < NumArgRegs) {
947 const TargetRegisterClass *ArgRegClass;
949 switch (ObjectVT.getSimpleVT()) {
951 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
952 << ObjectVT.getMVTString()
957 ArgRegClass = &SPU::R8CRegClass;
960 ArgRegClass = &SPU::R16CRegClass;
963 ArgRegClass = &SPU::R32CRegClass;
966 ArgRegClass = &SPU::R64CRegClass;
969 ArgRegClass = &SPU::R32FPRegClass;
972 ArgRegClass = &SPU::R64FPRegClass;
980 ArgRegClass = &SPU::VECREGRegClass;
985 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
986 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
987 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
990 // We need to load the argument to a virtual register if we determined
991 // above that we ran out of physical registers of the appropriate type
992 // or we're forced to do vararg
993 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
994 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
995 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
996 ArgOffset += StackSlotSize;
999 ArgValues.push_back(ArgVal);
1001 Root = ArgVal.getOperand(0);
1006 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1007 // We will spill (79-3)+1 registers to the stack
1008 SmallVector<SDValue, 79-3+1> MemOps;
1010 // Create the frame slot
1012 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1013 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1014 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1015 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1016 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1017 Root = Store.getOperand(0);
1018 MemOps.push_back(Store);
1020 // Increment address by stack slot size for the next stored argument
1021 ArgOffset += StackSlotSize;
1023 if (!MemOps.empty())
1024 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1027 ArgValues.push_back(Root);
1029 // Return the new list of results.
1030 return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1034 /// isLSAAddress - Return the immediate to use if the specified
1035 /// value is representable as a LSA address.
1036 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1037 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1040 int Addr = C->getZExtValue();
1041 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1042 (Addr << 14 >> 14) != Addr)
1043 return 0; // Top 14 bits have to be sext of immediate.
1045 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1050 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1051 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1052 SDValue Chain = TheCall->getChain();
1053 SDValue Callee = TheCall->getCallee();
1054 unsigned NumOps = TheCall->getNumArgs();
1055 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1056 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1057 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1059 // Handy pointer type
1060 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1062 // Accumulate how many bytes are to be pushed on the stack, including the
1063 // linkage area, and parameter passing area. According to the SPU ABI,
1064 // we minimally need space for [LR] and [SP]
1065 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1067 // Set up a copy of the stack pointer for use loading and storing any
1068 // arguments that may not fit in the registers available for argument
1070 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1072 // Figure out which arguments are going to go in registers, and which in
1074 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1075 unsigned ArgRegIdx = 0;
1077 // Keep track of registers passing arguments
1078 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1079 // And the arguments passed on the stack
1080 SmallVector<SDValue, 8> MemOpChains;
1082 for (unsigned i = 0; i != NumOps; ++i) {
1083 SDValue Arg = TheCall->getArg(i);
1085 // PtrOff will be used to store the current argument to the stack if a
1086 // register cannot be found for it.
1087 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1088 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1090 switch (Arg.getValueType().getSimpleVT()) {
1091 default: assert(0 && "Unexpected ValueType for argument!");
1095 if (ArgRegIdx != NumArgRegs) {
1096 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1098 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1099 ArgOffset += StackSlotSize;
1104 if (ArgRegIdx != NumArgRegs) {
1105 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1107 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1108 ArgOffset += StackSlotSize;
1115 if (ArgRegIdx != NumArgRegs) {
1116 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1118 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1119 ArgOffset += StackSlotSize;
1125 // Update number of stack bytes actually used, insert a call sequence start
1126 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1127 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1130 if (!MemOpChains.empty()) {
1131 // Adjust the stack pointer for the stack arguments.
1132 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1133 &MemOpChains[0], MemOpChains.size());
1136 // Build a sequence of copy-to-reg nodes chained together with token chain
1137 // and flag operands which copy the outgoing args into the appropriate regs.
1139 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1140 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1142 InFlag = Chain.getValue(1);
1145 SmallVector<SDValue, 8> Ops;
1146 unsigned CallOpc = SPUISD::CALL;
1148 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1149 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1150 // node so that legalize doesn't hack it.
1151 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1152 GlobalValue *GV = G->getGlobal();
1153 MVT CalleeVT = Callee.getValueType();
1154 SDValue Zero = DAG.getConstant(0, PtrVT);
1155 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1157 if (!ST->usingLargeMem()) {
1158 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1159 // style calls, otherwise, external symbols are BRASL calls. This assumes
1160 // that declared/defined symbols are in the same compilation unit and can
1161 // be reached through PC-relative jumps.
1164 // This may be an unsafe assumption for JIT and really large compilation
1166 if (GV->isDeclaration()) {
1167 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1169 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1172 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1174 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1176 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1177 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1178 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1179 // If this is an absolute destination address that appears to be a legal
1180 // local store address, use the munged value.
1181 Callee = SDValue(Dest, 0);
1184 Ops.push_back(Chain);
1185 Ops.push_back(Callee);
1187 // Add argument registers to the end of the list so that they are known live
1189 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1190 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1191 RegsToPass[i].second.getValueType()));
1193 if (InFlag.getNode())
1194 Ops.push_back(InFlag);
1195 // Returns a chain and a flag for retval copy to use.
1196 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1197 &Ops[0], Ops.size());
1198 InFlag = Chain.getValue(1);
1200 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1201 DAG.getIntPtrConstant(0, true), InFlag);
1202 if (TheCall->getValueType(0) != MVT::Other)
1203 InFlag = Chain.getValue(1);
1205 SDValue ResultVals[3];
1206 unsigned NumResults = 0;
1208 // If the call has results, copy the values out of the ret val registers.
1209 switch (TheCall->getValueType(0).getSimpleVT()) {
1210 default: assert(0 && "Unexpected ret value!");
1211 case MVT::Other: break;
1213 if (TheCall->getValueType(1) == MVT::i32) {
1214 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1215 ResultVals[0] = Chain.getValue(0);
1216 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1217 Chain.getValue(2)).getValue(1);
1218 ResultVals[1] = Chain.getValue(0);
1221 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1222 ResultVals[0] = Chain.getValue(0);
1227 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1228 ResultVals[0] = Chain.getValue(0);
1233 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1234 InFlag).getValue(1);
1235 ResultVals[0] = Chain.getValue(0);
1243 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1244 InFlag).getValue(1);
1245 ResultVals[0] = Chain.getValue(0);
1250 // If the function returns void, just return the chain.
1251 if (NumResults == 0)
1254 // Otherwise, merge everything together with a MERGE_VALUES node.
1255 ResultVals[NumResults++] = Chain;
1256 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1257 return Res.getValue(Op.getResNo());
1261 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1262 SmallVector<CCValAssign, 16> RVLocs;
1263 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1264 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1265 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1266 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1268 // If this is the first return lowered for this function, add the regs to the
1269 // liveout set for the function.
1270 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1271 for (unsigned i = 0; i != RVLocs.size(); ++i)
1272 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1275 SDValue Chain = Op.getOperand(0);
1278 // Copy the result values into the output registers.
1279 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1280 CCValAssign &VA = RVLocs[i];
1281 assert(VA.isRegLoc() && "Can only return in registers!");
1282 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1283 Flag = Chain.getValue(1);
1287 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1289 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1293 //===----------------------------------------------------------------------===//
1294 // Vector related lowering:
1295 //===----------------------------------------------------------------------===//
1297 static ConstantSDNode *
1298 getVecImm(SDNode *N) {
1299 SDValue OpVal(0, 0);
1301 // Check to see if this buildvec has a single non-undef value in its elements.
1302 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1303 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1304 if (OpVal.getNode() == 0)
1305 OpVal = N->getOperand(i);
1306 else if (OpVal != N->getOperand(i))
1310 if (OpVal.getNode() != 0) {
1311 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1316 return 0; // All UNDEF: use implicit def.; not Constant node
1319 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1320 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1322 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1324 if (ConstantSDNode *CN = getVecImm(N)) {
1325 uint64_t Value = CN->getZExtValue();
1326 if (ValueType == MVT::i64) {
1327 uint64_t UValue = CN->getZExtValue();
1328 uint32_t upper = uint32_t(UValue >> 32);
1329 uint32_t lower = uint32_t(UValue);
1332 Value = Value >> 32;
1334 if (Value <= 0x3ffff)
1335 return DAG.getTargetConstant(Value, ValueType);
1341 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1342 /// and the value fits into a signed 16-bit constant, and if so, return the
1344 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1346 if (ConstantSDNode *CN = getVecImm(N)) {
1347 int64_t Value = CN->getSExtValue();
1348 if (ValueType == MVT::i64) {
1349 uint64_t UValue = CN->getZExtValue();
1350 uint32_t upper = uint32_t(UValue >> 32);
1351 uint32_t lower = uint32_t(UValue);
1354 Value = Value >> 32;
1356 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1357 return DAG.getTargetConstant(Value, ValueType);
1364 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1365 /// and the value fits into a signed 10-bit constant, and if so, return the
1367 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1369 if (ConstantSDNode *CN = getVecImm(N)) {
1370 int64_t Value = CN->getSExtValue();
1371 if (ValueType == MVT::i64) {
1372 uint64_t UValue = CN->getZExtValue();
1373 uint32_t upper = uint32_t(UValue >> 32);
1374 uint32_t lower = uint32_t(UValue);
1377 Value = Value >> 32;
1379 if (isS10Constant(Value))
1380 return DAG.getTargetConstant(Value, ValueType);
1386 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1387 /// and the value fits into a signed 8-bit constant, and if so, return the
1390 /// @note: The incoming vector is v16i8 because that's the only way we can load
1391 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1393 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1395 if (ConstantSDNode *CN = getVecImm(N)) {
1396 int Value = (int) CN->getZExtValue();
1397 if (ValueType == MVT::i16
1398 && Value <= 0xffff /* truncated from uint64_t */
1399 && ((short) Value >> 8) == ((short) Value & 0xff))
1400 return DAG.getTargetConstant(Value & 0xff, ValueType);
1401 else if (ValueType == MVT::i8
1402 && (Value & 0xff) == Value)
1403 return DAG.getTargetConstant(Value, ValueType);
1409 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1410 /// and the value fits into a signed 16-bit constant, and if so, return the
1412 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1414 if (ConstantSDNode *CN = getVecImm(N)) {
1415 uint64_t Value = CN->getZExtValue();
1416 if ((ValueType == MVT::i32
1417 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1418 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1419 return DAG.getTargetConstant(Value >> 16, ValueType);
1425 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1426 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1427 if (ConstantSDNode *CN = getVecImm(N)) {
1428 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1434 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1435 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1436 if (ConstantSDNode *CN = getVecImm(N)) {
1437 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1443 // If this is a vector of constants or undefs, get the bits. A bit in
1444 // UndefBits is set if the corresponding element of the vector is an
1445 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1446 // zero. Return true if this is not an array of constants, false if it is.
1448 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1449 uint64_t UndefBits[2]) {
1450 // Start with zero'd results.
1451 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1453 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1454 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1455 SDValue OpVal = BV->getOperand(i);
1457 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1458 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1460 uint64_t EltBits = 0;
1461 if (OpVal.getOpcode() == ISD::UNDEF) {
1462 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1463 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1465 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1466 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1467 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1468 const APFloat &apf = CN->getValueAPF();
1469 EltBits = (CN->getValueType(0) == MVT::f32
1470 ? FloatToBits(apf.convertToFloat())
1471 : DoubleToBits(apf.convertToDouble()));
1473 // Nonconstant element.
1477 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1480 //printf("%llx %llx %llx %llx\n",
1481 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1485 /// If this is a splat (repetition) of a value across the whole vector, return
1486 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1487 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1488 /// SplatSize = 1 byte.
1489 static bool isConstantSplat(const uint64_t Bits128[2],
1490 const uint64_t Undef128[2],
1492 uint64_t &SplatBits, uint64_t &SplatUndef,
1494 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1495 // the same as the lower 64-bits, ignoring undefs.
1496 uint64_t Bits64 = Bits128[0] | Bits128[1];
1497 uint64_t Undef64 = Undef128[0] & Undef128[1];
1498 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1499 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1500 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1501 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1503 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1504 if (MinSplatBits < 64) {
1506 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1508 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1509 if (MinSplatBits < 32) {
1511 // If the top 16-bits are different than the lower 16-bits, ignoring
1512 // undefs, we have an i32 splat.
1513 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1514 if (MinSplatBits < 16) {
1515 // If the top 8-bits are different than the lower 8-bits, ignoring
1516 // undefs, we have an i16 splat.
1517 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1518 == ((Bits16 >> 8) & ~Undef16)) {
1519 // Otherwise, we have an 8-bit splat.
1520 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1521 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1527 SplatUndef = Undef16;
1534 SplatUndef = Undef32;
1540 SplatBits = Bits128[0];
1541 SplatUndef = Undef128[0];
1547 return false; // Can't be a splat if two pieces don't match.
1550 // If this is a case we can't handle, return null and let the default
1551 // expansion code take care of it. If we CAN select this case, and if it
1552 // selects to a single instruction, return Op. Otherwise, if we can codegen
1553 // this case more efficiently than a constant pool load, lower it to the
1554 // sequence of ops that should be used.
1555 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1556 MVT VT = Op.getValueType();
1557 // If this is a vector of constants or undefs, get the bits. A bit in
1558 // UndefBits is set if the corresponding element of the vector is an
1559 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1561 uint64_t VectorBits[2];
1562 uint64_t UndefBits[2];
1563 uint64_t SplatBits, SplatUndef;
1565 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1566 || !isConstantSplat(VectorBits, UndefBits,
1567 VT.getVectorElementType().getSizeInBits(),
1568 SplatBits, SplatUndef, SplatSize))
1569 return SDValue(); // Not a constant vector, not a splat.
1571 switch (VT.getSimpleVT()) {
1574 uint32_t Value32 = SplatBits;
1575 assert(SplatSize == 4
1576 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1577 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1578 SDValue T = DAG.getConstant(Value32, MVT::i32);
1579 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1580 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1584 uint64_t f64val = SplatBits;
1585 assert(SplatSize == 8
1586 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1587 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1588 SDValue T = DAG.getConstant(f64val, MVT::i64);
1589 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1590 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1594 // 8-bit constants have to be expanded to 16-bits
1595 unsigned short Value16 = SplatBits | (SplatBits << 8);
1597 for (int i = 0; i < 8; ++i)
1598 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1599 return DAG.getNode(ISD::BIT_CONVERT, VT,
1600 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1603 unsigned short Value16;
1605 Value16 = (unsigned short) (SplatBits & 0xffff);
1607 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1608 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1610 for (int i = 0; i < 8; ++i) Ops[i] = T;
1611 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1614 unsigned int Value = SplatBits;
1615 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1616 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1619 uint64_t val = SplatBits;
1620 uint32_t upper = uint32_t(val >> 32);
1621 uint32_t lower = uint32_t(val);
1623 if (upper == lower) {
1624 // Magic constant that can be matched by IL, ILA, et. al.
1625 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1626 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1630 SmallVector<SDValue, 16> ShufBytes;
1632 bool upper_special, lower_special;
1634 // NOTE: This code creates common-case shuffle masks that can be easily
1635 // detected as common expressions. It is not attempting to create highly
1636 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1638 // Detect if the upper or lower half is a special shuffle mask pattern:
1639 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1640 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1642 // Create lower vector if not a special pattern
1643 if (!lower_special) {
1644 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1645 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1646 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1647 LO32C, LO32C, LO32C, LO32C));
1650 // Create upper vector if not a special pattern
1651 if (!upper_special) {
1652 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1653 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1654 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1655 HI32C, HI32C, HI32C, HI32C));
1658 // If either upper or lower are special, then the two input operands are
1659 // the same (basically, one of them is a "don't care")
1664 if (lower_special && upper_special) {
1665 // Unhappy situation... both upper and lower are special, so punt with
1666 // a target constant:
1667 SDValue Zero = DAG.getConstant(0, MVT::i32);
1668 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1672 for (int i = 0; i < 4; ++i) {
1674 for (int j = 0; j < 4; ++j) {
1676 bool process_upper, process_lower;
1678 process_upper = (upper_special && (i & 1) == 0);
1679 process_lower = (lower_special && (i & 1) == 1);
1681 if (process_upper || process_lower) {
1682 if ((process_upper && upper == 0)
1683 || (process_lower && lower == 0))
1685 else if ((process_upper && upper == 0xffffffff)
1686 || (process_lower && lower == 0xffffffff))
1688 else if ((process_upper && upper == 0x80000000)
1689 || (process_lower && lower == 0x80000000))
1690 val |= (j == 0 ? 0xe0 : 0x80);
1692 val |= i * 4 + j + ((i & 1) * 16);
1695 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1698 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1699 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1700 &ShufBytes[0], ShufBytes.size()));
1708 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1709 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1710 /// permutation vector, V3, is monotonically increasing with one "exception"
1711 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1712 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1713 /// In either case, the net result is going to eventually invoke SHUFB to
1714 /// permute/shuffle the bytes from V1 and V2.
1716 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1717 /// control word for byte/halfword/word insertion. This takes care of a single
1718 /// element move from V2 into V1.
1720 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1721 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1722 SDValue V1 = Op.getOperand(0);
1723 SDValue V2 = Op.getOperand(1);
1724 SDValue PermMask = Op.getOperand(2);
1726 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1728 // If we have a single element being moved from V1 to V2, this can be handled
1729 // using the C*[DX] compute mask instructions, but the vector elements have
1730 // to be monotonically increasing with one exception element.
1731 MVT EltVT = V1.getValueType().getVectorElementType();
1732 unsigned EltsFromV2 = 0;
1734 unsigned V2EltIdx0 = 0;
1735 unsigned CurrElt = 0;
1736 bool monotonic = true;
1737 if (EltVT == MVT::i8)
1739 else if (EltVT == MVT::i16)
1741 else if (EltVT == MVT::i32)
1744 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1746 for (unsigned i = 0, e = PermMask.getNumOperands();
1747 EltsFromV2 <= 1 && monotonic && i != e;
1750 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1753 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1755 if (SrcElt >= V2EltIdx0) {
1757 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1758 } else if (CurrElt != SrcElt) {
1765 if (EltsFromV2 == 1 && monotonic) {
1766 // Compute mask and shuffle
1767 MachineFunction &MF = DAG.getMachineFunction();
1768 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1769 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1770 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1771 // Initialize temporary register to 0
1772 SDValue InitTempReg =
1773 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1774 // Copy register's contents as index in INSERT_MASK:
1775 SDValue ShufMaskOp =
1776 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1777 DAG.getTargetConstant(V2Elt, MVT::i32),
1778 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1779 // Use shuffle mask in SHUFB synthetic instruction:
1780 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1782 // Convert the SHUFFLE_VECTOR mask's input element units to the
1784 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1786 SmallVector<SDValue, 16> ResultMask;
1787 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1789 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1792 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1794 for (unsigned j = 0; j < BytesPerElement; ++j) {
1795 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1800 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1801 &ResultMask[0], ResultMask.size());
1802 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1806 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1807 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1809 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1810 // For a constant, build the appropriate constant vector, which will
1811 // eventually simplify to a vector register load.
1813 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1814 SmallVector<SDValue, 16> ConstVecValues;
1818 // Create a constant vector:
1819 switch (Op.getValueType().getSimpleVT()) {
1820 default: assert(0 && "Unexpected constant value type in "
1821 "LowerSCALAR_TO_VECTOR");
1822 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1823 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1824 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1825 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1826 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1827 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1830 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1831 for (size_t j = 0; j < n_copies; ++j)
1832 ConstVecValues.push_back(CValue);
1834 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1835 &ConstVecValues[0], ConstVecValues.size());
1837 // Otherwise, copy the value from one register to another:
1838 switch (Op0.getValueType().getSimpleVT()) {
1839 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1846 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1853 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1854 switch (Op.getValueType().getSimpleVT()) {
1856 cerr << "CellSPU: Unknown vector multiplication, got "
1857 << Op.getValueType().getMVTString()
1863 SDValue rA = Op.getOperand(0);
1864 SDValue rB = Op.getOperand(1);
1865 SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1866 SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1867 SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1868 SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1870 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1874 // Multiply two v8i16 vectors (pipeline friendly version):
1875 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1876 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1877 // c) Use SELB to select upper and lower halves from the intermediate results
1879 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1880 // dual-issue. This code does manage to do this, even if it's a little on
1883 MachineFunction &MF = DAG.getMachineFunction();
1884 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1885 SDValue Chain = Op.getOperand(0);
1886 SDValue rA = Op.getOperand(0);
1887 SDValue rB = Op.getOperand(1);
1888 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1889 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1892 DAG.getCopyToReg(Chain, FSMBIreg,
1893 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1894 DAG.getConstant(0xcccc, MVT::i16)));
1897 DAG.getCopyToReg(FSMBOp, HiProdReg,
1898 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1900 SDValue HHProd_v4i32 =
1901 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1902 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1904 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1905 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1906 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1907 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1909 DAG.getConstant(16, MVT::i16))),
1910 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1913 // This M00sE is N@stI! (apologies to Monty Python)
1915 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1916 // is to break it all apart, sign extend, and reassemble the various
1917 // intermediate products.
1919 SDValue rA = Op.getOperand(0);
1920 SDValue rB = Op.getOperand(1);
1921 SDValue c8 = DAG.getConstant(8, MVT::i32);
1922 SDValue c16 = DAG.getConstant(16, MVT::i32);
1925 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1926 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1927 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1929 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1931 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1934 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1935 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1937 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1938 DAG.getConstant(0x2222, MVT::i16));
1940 SDValue LoProdParts =
1941 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1942 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1943 LLProd, LHProd, FSMBmask));
1945 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1948 DAG.getNode(ISD::AND, MVT::v4i32,
1950 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1951 LoProdMask, LoProdMask,
1952 LoProdMask, LoProdMask));
1955 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1956 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1959 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1960 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1963 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1964 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1965 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1968 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1969 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1970 DAG.getNode(SPUISD::VEC_SRA,
1971 MVT::v4i32, rAH, c8)),
1972 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1973 DAG.getNode(SPUISD::VEC_SRA,
1974 MVT::v4i32, rBH, c8)));
1977 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1979 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
1983 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
1985 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
1986 DAG.getNode(ISD::OR, MVT::v4i32,
1994 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
1995 MachineFunction &MF = DAG.getMachineFunction();
1996 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1998 SDValue A = Op.getOperand(0);
1999 SDValue B = Op.getOperand(1);
2000 MVT VT = Op.getValueType();
2002 unsigned VRegBR, VRegC;
2004 if (VT == MVT::f32) {
2005 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2006 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2008 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2009 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2011 // TODO: make sure we're feeding FPInterp the right arguments
2012 // Right now: fi B, frest(B)
2015 // (Floating Interpolate (FP Reciprocal Estimate B))
2017 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2018 DAG.getNode(SPUISD::FPInterp, VT, B,
2019 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2021 // Computes A * BRcpl and stores in a temporary register
2023 DAG.getCopyToReg(BRcpl, VRegC,
2024 DAG.getNode(ISD::FMUL, VT, A,
2025 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2026 // What's the Chain variable do? It's magic!
2027 // TODO: set Chain = Op(0).getEntryNode()
2029 return DAG.getNode(ISD::FADD, VT,
2030 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2031 DAG.getNode(ISD::FMUL, VT,
2032 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2033 DAG.getNode(ISD::FSUB, VT, A,
2034 DAG.getNode(ISD::FMUL, VT, B,
2035 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2038 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2039 MVT VT = Op.getValueType();
2040 SDValue N = Op.getOperand(0);
2041 SDValue Elt = Op.getOperand(1);
2042 SDValue ShufMask[16];
2043 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2045 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2047 int EltNo = (int) C->getZExtValue();
2050 if (VT == MVT::i8 && EltNo >= 16)
2051 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2052 else if (VT == MVT::i16 && EltNo >= 8)
2053 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2054 else if (VT == MVT::i32 && EltNo >= 4)
2055 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2056 else if (VT == MVT::i64 && EltNo >= 2)
2057 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2059 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2060 // i32 and i64: Element 0 is the preferred slot
2061 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2064 // Need to generate shuffle mask and extract:
2065 int prefslot_begin = -1, prefslot_end = -1;
2066 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2068 switch (VT.getSimpleVT()) {
2070 assert(false && "Invalid value type!");
2072 prefslot_begin = prefslot_end = 3;
2076 prefslot_begin = 2; prefslot_end = 3;
2080 prefslot_begin = 0; prefslot_end = 3;
2084 prefslot_begin = 0; prefslot_end = 7;
2089 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2090 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2092 for (int i = 0; i < 16; ++i) {
2093 // zero fill uppper part of preferred slot, don't care about the
2095 unsigned int mask_val;
2097 if (i <= prefslot_end) {
2099 ((i < prefslot_begin)
2101 : elt_byte + (i - prefslot_begin));
2103 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2105 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2108 SDValue ShufMaskVec =
2109 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2111 sizeof(ShufMask) / sizeof(ShufMask[0]));
2113 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2114 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2115 N, N, ShufMaskVec));
2119 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2120 SDValue VecOp = Op.getOperand(0);
2121 SDValue ValOp = Op.getOperand(1);
2122 SDValue IdxOp = Op.getOperand(2);
2123 MVT VT = Op.getValueType();
2125 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2126 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2128 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2129 // Use $2 because it's always 16-byte aligned and it's available:
2130 SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2133 DAG.getNode(SPUISD::SHUFB, VT,
2134 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2136 DAG.getNode(SPUISD::INSERT_MASK, VT,
2137 DAG.getNode(ISD::ADD, PtrVT,
2139 DAG.getConstant(CN->getZExtValue(),
2145 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2147 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2149 assert(Op.getValueType() == MVT::i8);
2152 assert(0 && "Unhandled i8 math operator");
2156 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2158 SDValue N1 = Op.getOperand(1);
2159 N0 = (N0.getOpcode() != ISD::Constant
2160 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2161 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2163 N1 = (N1.getOpcode() != ISD::Constant
2164 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2165 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2167 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2168 DAG.getNode(Opc, MVT::i16, N0, N1));
2172 SDValue N1 = Op.getOperand(1);
2174 N0 = (N0.getOpcode() != ISD::Constant
2175 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2176 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2178 N1Opc = N1.getValueType().bitsLT(MVT::i32)
2181 N1 = (N1.getOpcode() != ISD::Constant
2182 ? DAG.getNode(N1Opc, MVT::i32, N1)
2183 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2186 DAG.getNode(ISD::OR, MVT::i16, N0,
2187 DAG.getNode(ISD::SHL, MVT::i16,
2188 N0, DAG.getConstant(8, MVT::i32)));
2189 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2190 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2194 SDValue N1 = Op.getOperand(1);
2196 N0 = (N0.getOpcode() != ISD::Constant
2197 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2198 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2200 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2203 N1 = (N1.getOpcode() != ISD::Constant
2204 ? DAG.getNode(N1Opc, MVT::i16, N1)
2205 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2207 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2208 DAG.getNode(Opc, MVT::i16, N0, N1));
2211 SDValue N1 = Op.getOperand(1);
2213 N0 = (N0.getOpcode() != ISD::Constant
2214 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2215 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2217 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2220 N1 = (N1.getOpcode() != ISD::Constant
2221 ? DAG.getNode(N1Opc, MVT::i16, N1)
2222 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2224 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2225 DAG.getNode(Opc, MVT::i16, N0, N1));
2228 SDValue N1 = Op.getOperand(1);
2230 N0 = (N0.getOpcode() != ISD::Constant
2231 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2232 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2234 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2235 N1 = (N1.getOpcode() != ISD::Constant
2236 ? DAG.getNode(N1Opc, MVT::i16, N1)
2237 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2239 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2240 DAG.getNode(Opc, MVT::i16, N0, N1));
2248 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2250 MVT VT = Op.getValueType();
2251 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2253 SDValue Op0 = Op.getOperand(0);
2256 case ISD::ZERO_EXTEND:
2257 case ISD::SIGN_EXTEND:
2258 case ISD::ANY_EXTEND: {
2259 MVT Op0VT = Op0.getValueType();
2260 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2262 assert(Op0VT == MVT::i32
2263 && "CellSPU: Zero/sign extending something other than i32");
2264 DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2266 unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2267 ? SPUISD::ROTBYTES_RIGHT_S
2268 : SPUISD::ROTQUAD_RZ_BYTES);
2269 SDValue PromoteScalar =
2270 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2272 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2273 DAG.getNode(ISD::BIT_CONVERT, VecVT,
2274 DAG.getNode(NewOpc, Op0VecVT,
2276 DAG.getConstant(4, MVT::i32))));
2280 // Turn operands into vectors to satisfy type checking (shufb works on
2283 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2285 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2286 SmallVector<SDValue, 16> ShufBytes;
2288 // Create the shuffle mask for "rotating" the borrow up one register slot
2289 // once the borrow is generated.
2290 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2291 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2292 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2293 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2296 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2297 SDValue ShiftedCarry =
2298 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2300 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2301 &ShufBytes[0], ShufBytes.size()));
2303 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2304 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2305 Op0, Op1, ShiftedCarry));
2309 // Turn operands into vectors to satisfy type checking (shufb works on
2312 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2314 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2315 SmallVector<SDValue, 16> ShufBytes;
2317 // Create the shuffle mask for "rotating" the borrow up one register slot
2318 // once the borrow is generated.
2319 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2320 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2321 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2322 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2325 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2326 SDValue ShiftedBorrow =
2327 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2328 BorrowGen, BorrowGen,
2329 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2330 &ShufBytes[0], ShufBytes.size()));
2332 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2333 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2334 Op0, Op1, ShiftedBorrow));
2338 SDValue ShiftAmt = Op.getOperand(1);
2339 MVT ShiftAmtVT = ShiftAmt.getValueType();
2340 SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2342 DAG.getNode(SPUISD::SELB, VecVT,
2344 DAG.getConstant(0, VecVT),
2345 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2346 DAG.getConstant(0xff00ULL, MVT::i16)));
2347 SDValue ShiftAmtBytes =
2348 DAG.getNode(ISD::SRL, ShiftAmtVT,
2350 DAG.getConstant(3, ShiftAmtVT));
2351 SDValue ShiftAmtBits =
2352 DAG.getNode(ISD::AND, ShiftAmtVT,
2354 DAG.getConstant(7, ShiftAmtVT));
2356 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2357 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2358 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2359 MaskLower, ShiftAmtBytes),
2364 MVT VT = Op.getValueType();
2365 SDValue ShiftAmt = Op.getOperand(1);
2366 MVT ShiftAmtVT = ShiftAmt.getValueType();
2367 SDValue ShiftAmtBytes =
2368 DAG.getNode(ISD::SRL, ShiftAmtVT,
2370 DAG.getConstant(3, ShiftAmtVT));
2371 SDValue ShiftAmtBits =
2372 DAG.getNode(ISD::AND, ShiftAmtVT,
2374 DAG.getConstant(7, ShiftAmtVT));
2376 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2377 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2378 Op0, ShiftAmtBytes),
2383 // Promote Op0 to vector
2385 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2386 SDValue ShiftAmt = Op.getOperand(1);
2387 MVT ShiftVT = ShiftAmt.getValueType();
2389 // Negate variable shift amounts
2390 if (!isa<ConstantSDNode>(ShiftAmt)) {
2391 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2392 DAG.getConstant(0, ShiftVT), ShiftAmt);
2395 SDValue UpperHalfSign =
2396 DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2397 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2398 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2399 Op0, DAG.getConstant(31, MVT::i32))));
2400 SDValue UpperHalfSignMask =
2401 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2402 SDValue UpperLowerMask =
2403 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2404 DAG.getConstant(0xff00, MVT::i16));
2405 SDValue UpperLowerSelect =
2406 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2407 UpperHalfSignMask, Op0, UpperLowerMask);
2408 SDValue RotateLeftBytes =
2409 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2410 UpperLowerSelect, ShiftAmt);
2411 SDValue RotateLeftBits =
2412 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2413 RotateLeftBytes, ShiftAmt);
2415 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2423 //! Lower byte immediate operations for v16i8 vectors:
2425 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2428 MVT VT = Op.getValueType();
2430 ConstVec = Op.getOperand(0);
2431 Arg = Op.getOperand(1);
2432 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2433 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2434 ConstVec = ConstVec.getOperand(0);
2436 ConstVec = Op.getOperand(1);
2437 Arg = Op.getOperand(0);
2438 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2439 ConstVec = ConstVec.getOperand(0);
2444 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2445 uint64_t VectorBits[2];
2446 uint64_t UndefBits[2];
2447 uint64_t SplatBits, SplatUndef;
2450 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2451 && isConstantSplat(VectorBits, UndefBits,
2452 VT.getVectorElementType().getSizeInBits(),
2453 SplatBits, SplatUndef, SplatSize)) {
2455 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2456 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2458 // Turn the BUILD_VECTOR into a set of target constants:
2459 for (size_t i = 0; i < tcVecSize; ++i)
2462 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2463 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2466 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2467 // lowered. Return the operation, rather than a null SDValue.
2471 //! Lower i32 multiplication
2472 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2474 switch (VT.getSimpleVT()) {
2476 cerr << "CellSPU: Unknown LowerMUL value type, got "
2477 << Op.getValueType().getMVTString()
2483 SDValue rA = Op.getOperand(0);
2484 SDValue rB = Op.getOperand(1);
2486 return DAG.getNode(ISD::ADD, MVT::i32,
2487 DAG.getNode(ISD::ADD, MVT::i32,
2488 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2489 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2490 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2497 //! Custom lowering for CTPOP (count population)
2499 Custom lowering code that counts the number ones in the input
2500 operand. SPU has such an instruction, but it counts the number of
2501 ones per byte, which then have to be accumulated.
2503 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2504 MVT VT = Op.getValueType();
2505 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2507 switch (VT.getSimpleVT()) {
2509 assert(false && "Invalid value type!");
2511 SDValue N = Op.getOperand(0);
2512 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2514 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2515 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2517 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2521 MachineFunction &MF = DAG.getMachineFunction();
2522 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2524 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2526 SDValue N = Op.getOperand(0);
2527 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2528 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2529 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2531 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2532 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2534 // CNTB_result becomes the chain to which all of the virtual registers
2535 // CNTB_reg, SUM1_reg become associated:
2536 SDValue CNTB_result =
2537 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2539 SDValue CNTB_rescopy =
2540 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2542 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2544 return DAG.getNode(ISD::AND, MVT::i16,
2545 DAG.getNode(ISD::ADD, MVT::i16,
2546 DAG.getNode(ISD::SRL, MVT::i16,
2553 MachineFunction &MF = DAG.getMachineFunction();
2554 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2556 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2557 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2559 SDValue N = Op.getOperand(0);
2560 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2561 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2562 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2563 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2565 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2566 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2568 // CNTB_result becomes the chain to which all of the virtual registers
2569 // CNTB_reg, SUM1_reg become associated:
2570 SDValue CNTB_result =
2571 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2573 SDValue CNTB_rescopy =
2574 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2577 DAG.getNode(ISD::SRL, MVT::i32,
2578 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2581 DAG.getNode(ISD::ADD, MVT::i32,
2582 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2584 SDValue Sum1_rescopy =
2585 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2588 DAG.getNode(ISD::SRL, MVT::i32,
2589 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2592 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2593 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2595 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2605 /// LowerOperation - Provide custom lowering hooks for some operations.
2608 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2610 unsigned Opc = (unsigned) Op.getOpcode();
2611 MVT VT = Op.getValueType();
2615 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2616 cerr << "Op.getOpcode() = " << Opc << "\n";
2617 cerr << "*Op.getNode():\n";
2618 Op.getNode()->dump();
2624 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2626 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2627 case ISD::ConstantPool:
2628 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2629 case ISD::GlobalAddress:
2630 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2631 case ISD::JumpTable:
2632 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2634 return LowerConstant(Op, DAG);
2635 case ISD::ConstantFP:
2636 return LowerConstantFP(Op, DAG);
2638 return LowerBRCOND(Op, DAG);
2639 case ISD::FORMAL_ARGUMENTS:
2640 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2642 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2644 return LowerRET(Op, DAG, getTargetMachine());
2647 // i8, i64 math ops:
2648 case ISD::ZERO_EXTEND:
2649 case ISD::SIGN_EXTEND:
2650 case ISD::ANY_EXTEND:
2659 return LowerI8Math(Op, DAG, Opc);
2660 else if (VT == MVT::i64)
2661 return LowerI64Math(Op, DAG, Opc);
2665 // Vector-related lowering.
2666 case ISD::BUILD_VECTOR:
2667 return LowerBUILD_VECTOR(Op, DAG);
2668 case ISD::SCALAR_TO_VECTOR:
2669 return LowerSCALAR_TO_VECTOR(Op, DAG);
2670 case ISD::VECTOR_SHUFFLE:
2671 return LowerVECTOR_SHUFFLE(Op, DAG);
2672 case ISD::EXTRACT_VECTOR_ELT:
2673 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2674 case ISD::INSERT_VECTOR_ELT:
2675 return LowerINSERT_VECTOR_ELT(Op, DAG);
2677 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2681 return LowerByteImmed(Op, DAG);
2683 // Vector and i8 multiply:
2686 return LowerVectorMUL(Op, DAG);
2687 else if (VT == MVT::i8)
2688 return LowerI8Math(Op, DAG, Opc);
2690 return LowerMUL(Op, DAG, VT, Opc);
2693 if (VT == MVT::f32 || VT == MVT::v4f32)
2694 return LowerFDIVf32(Op, DAG);
2695 // else if (Op.getValueType() == MVT::f64)
2696 // return LowerFDIVf64(Op, DAG);
2698 assert(0 && "Calling FDIV on unsupported MVT");
2701 return LowerCTPOP(Op, DAG);
2707 //===----------------------------------------------------------------------===//
2708 // Target Optimization Hooks
2709 //===----------------------------------------------------------------------===//
2712 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2715 TargetMachine &TM = getTargetMachine();
2717 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2718 SelectionDAG &DAG = DCI.DAG;
2719 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2720 SDValue Result; // Initially, NULL result
2722 switch (N->getOpcode()) {
2725 SDValue Op1 = N->getOperand(1);
2727 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2728 SDValue Op01 = Op0.getOperand(1);
2729 if (Op01.getOpcode() == ISD::Constant
2730 || Op01.getOpcode() == ISD::TargetConstant) {
2731 // (add <const>, (SPUindirect <arg>, <const>)) ->
2732 // (SPUindirect <arg>, <const + const>)
2733 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2734 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2735 SDValue combinedConst =
2736 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2737 Op0.getValueType());
2739 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2740 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2741 DEBUG(cerr << "With: (SPUindirect <arg>, "
2742 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2743 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2744 Op0.getOperand(0), combinedConst);
2746 } else if (isa<ConstantSDNode>(Op0)
2747 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2748 SDValue Op11 = Op1.getOperand(1);
2749 if (Op11.getOpcode() == ISD::Constant
2750 || Op11.getOpcode() == ISD::TargetConstant) {
2751 // (add (SPUindirect <arg>, <const>), <const>) ->
2752 // (SPUindirect <arg>, <const + const>)
2753 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2754 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2755 SDValue combinedConst =
2756 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2757 Op0.getValueType());
2759 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2760 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2761 DEBUG(cerr << "With: (SPUindirect <arg>, "
2762 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2764 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2765 Op1.getOperand(0), combinedConst);
2770 case ISD::SIGN_EXTEND:
2771 case ISD::ZERO_EXTEND:
2772 case ISD::ANY_EXTEND: {
2773 if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2774 N->getValueType(0) == Op0.getValueType()) {
2775 // (any_extend (SPUextract_elt0 <arg>)) ->
2776 // (SPUextract_elt0 <arg>)
2777 // Types must match, however...
2778 DEBUG(cerr << "Replace: ");
2779 DEBUG(N->dump(&DAG));
2780 DEBUG(cerr << "\nWith: ");
2781 DEBUG(Op0.getNode()->dump(&DAG));
2782 DEBUG(cerr << "\n");
2788 case SPUISD::IndirectAddr: {
2789 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2790 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2791 if (CN->getZExtValue() == 0) {
2792 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2793 // (SPUaform <addr>, 0)
2795 DEBUG(cerr << "Replace: ");
2796 DEBUG(N->dump(&DAG));
2797 DEBUG(cerr << "\nWith: ");
2798 DEBUG(Op0.getNode()->dump(&DAG));
2799 DEBUG(cerr << "\n");
2806 case SPUISD::SHLQUAD_L_BITS:
2807 case SPUISD::SHLQUAD_L_BYTES:
2808 case SPUISD::VEC_SHL:
2809 case SPUISD::VEC_SRL:
2810 case SPUISD::VEC_SRA:
2811 case SPUISD::ROTQUAD_RZ_BYTES:
2812 case SPUISD::ROTQUAD_RZ_BITS: {
2813 SDValue Op1 = N->getOperand(1);
2815 if (isa<ConstantSDNode>(Op1)) {
2816 // Kill degenerate vector shifts:
2817 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2819 if (CN->getZExtValue() == 0) {
2825 case SPUISD::PROMOTE_SCALAR: {
2826 switch (Op0.getOpcode()) {
2829 case ISD::ANY_EXTEND:
2830 case ISD::ZERO_EXTEND:
2831 case ISD::SIGN_EXTEND: {
2832 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2834 // but only if the SPUpromote_scalar and <arg> types match.
2835 SDValue Op00 = Op0.getOperand(0);
2836 if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2837 SDValue Op000 = Op00.getOperand(0);
2838 if (Op000.getValueType() == N->getValueType(0)) {
2844 case SPUISD::EXTRACT_ELT0: {
2845 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2847 Result = Op0.getOperand(0);
2854 // Otherwise, return unchanged.
2856 if (Result.getNode()) {
2857 DEBUG(cerr << "\nReplace.SPU: ");
2858 DEBUG(N->dump(&DAG));
2859 DEBUG(cerr << "\nWith: ");
2860 DEBUG(Result.getNode()->dump(&DAG));
2861 DEBUG(cerr << "\n");
2868 //===----------------------------------------------------------------------===//
2869 // Inline Assembly Support
2870 //===----------------------------------------------------------------------===//
2872 /// getConstraintType - Given a constraint letter, return the type of
2873 /// constraint it is for this target.
2874 SPUTargetLowering::ConstraintType
2875 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2876 if (ConstraintLetter.size() == 1) {
2877 switch (ConstraintLetter[0]) {
2884 return C_RegisterClass;
2887 return TargetLowering::getConstraintType(ConstraintLetter);
2890 std::pair<unsigned, const TargetRegisterClass*>
2891 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2894 if (Constraint.size() == 1) {
2895 // GCC RS6000 Constraint Letters
2896 switch (Constraint[0]) {
2900 return std::make_pair(0U, SPU::R64CRegisterClass);
2901 return std::make_pair(0U, SPU::R32CRegisterClass);
2904 return std::make_pair(0U, SPU::R32FPRegisterClass);
2905 else if (VT == MVT::f64)
2906 return std::make_pair(0U, SPU::R64FPRegisterClass);
2909 return std::make_pair(0U, SPU::GPRCRegisterClass);
2913 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2916 //! Compute used/known bits for a SPU operand
2918 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2922 const SelectionDAG &DAG,
2923 unsigned Depth ) const {
2925 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2928 switch (Op.getOpcode()) {
2930 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2940 case SPUISD::PROMOTE_SCALAR: {
2941 SDValue Op0 = Op.getOperand(0);
2942 MVT Op0VT = Op0.getValueType();
2943 unsigned Op0VTBits = Op0VT.getSizeInBits();
2944 uint64_t InMask = Op0VT.getIntegerVTBitMask();
2945 KnownZero |= APInt(Op0VTBits, ~InMask, false);
2946 KnownOne |= APInt(Op0VTBits, InMask, false);
2950 case SPUISD::LDRESULT:
2951 case SPUISD::EXTRACT_ELT0:
2952 case SPUISD::EXTRACT_ELT0_CHAINED: {
2953 MVT OpVT = Op.getValueType();
2954 unsigned OpVTBits = OpVT.getSizeInBits();
2955 uint64_t InMask = OpVT.getIntegerVTBitMask();
2956 KnownZero |= APInt(OpVTBits, ~InMask, false);
2957 KnownOne |= APInt(OpVTBits, InMask, false);
2962 case EXTRACT_I1_ZEXT:
2963 case EXTRACT_I1_SEXT:
2964 case EXTRACT_I8_ZEXT:
2965 case EXTRACT_I8_SEXT:
2970 case SPUISD::SHLQUAD_L_BITS:
2971 case SPUISD::SHLQUAD_L_BYTES:
2972 case SPUISD::VEC_SHL:
2973 case SPUISD::VEC_SRL:
2974 case SPUISD::VEC_SRA:
2975 case SPUISD::VEC_ROTL:
2976 case SPUISD::VEC_ROTR:
2977 case SPUISD::ROTQUAD_RZ_BYTES:
2978 case SPUISD::ROTQUAD_RZ_BITS:
2979 case SPUISD::ROTBYTES_RIGHT_S:
2980 case SPUISD::ROTBYTES_LEFT:
2981 case SPUISD::ROTBYTES_LEFT_CHAINED:
2982 case SPUISD::SELECT_MASK:
2984 case SPUISD::FPInterp:
2985 case SPUISD::FPRecipEst:
2986 case SPUISD::SEXT32TO64:
2991 // LowerAsmOperandForConstraint
2993 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
2994 char ConstraintLetter,
2996 std::vector<SDValue> &Ops,
2997 SelectionDAG &DAG) const {
2998 // Default, for the time being, to the base class handler
2999 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3003 /// isLegalAddressImmediate - Return true if the integer value can be used
3004 /// as the offset of the target addressing mode.
3005 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3006 const Type *Ty) const {
3007 // SPU's addresses are 256K:
3008 return (V > -(1 << 18) && V < (1 << 18) - 1);
3011 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3016 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3017 // The SPU target isn't yet aware of offsets.