1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT mapping to useful data for Cell SPU
41 struct valtype_map_s {
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDValue &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDValue &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // Initialize libcalls:
134 setLibcallName(RTLIB::MUL_I64, "__muldi3");
136 // SPU has no sign or zero extended loads for i1, i8, i16:
137 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
138 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
139 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
141 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
142 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
143 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
144 setTruncStoreAction(MVT::i8, MVT::i8, Custom);
145 setTruncStoreAction(MVT::i16, MVT::i8, Custom);
146 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
147 setTruncStoreAction(MVT::i64, MVT::i8, Custom);
148 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
150 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
151 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
152 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
154 // SPU constant load actions are custom lowered:
155 setOperationAction(ISD::Constant, MVT::i64, Custom);
156 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
157 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
159 // SPU's loads and stores have to be custom lowered:
160 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
162 MVT VT = (MVT::SimpleValueType)sctype;
164 setOperationAction(ISD::LOAD, VT, Custom);
165 setOperationAction(ISD::STORE, VT, Custom);
168 // Custom lower BRCOND for i1, i8 to "promote" the result to
169 // i32 and i16, respectively.
170 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
172 // Expand the jumptable branches
173 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
174 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
175 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
177 // SPU has no intrinsics for these particular operations:
178 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
180 // PowerPC has no SREM/UREM instructions
181 setOperationAction(ISD::SREM, MVT::i32, Expand);
182 setOperationAction(ISD::UREM, MVT::i32, Expand);
183 setOperationAction(ISD::SREM, MVT::i64, Expand);
184 setOperationAction(ISD::UREM, MVT::i64, Expand);
186 // We don't support sin/cos/sqrt/fmod
187 setOperationAction(ISD::FSIN , MVT::f64, Expand);
188 setOperationAction(ISD::FCOS , MVT::f64, Expand);
189 setOperationAction(ISD::FREM , MVT::f64, Expand);
190 setOperationAction(ISD::FSIN , MVT::f32, Expand);
191 setOperationAction(ISD::FCOS , MVT::f32, Expand);
192 setOperationAction(ISD::FREM , MVT::f32, Expand);
194 // If we're enabling GP optimizations, use hardware square root
195 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
196 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
198 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
199 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
201 // SPU can do rotate right and left, so legalize it... but customize for i8
202 // because instructions don't exist.
204 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
206 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
207 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
208 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
210 setOperationAction(ISD::ROTL, MVT::i32, Legal);
211 setOperationAction(ISD::ROTL, MVT::i16, Legal);
212 setOperationAction(ISD::ROTL, MVT::i8, Custom);
214 // SPU has no native version of shift left/right for i8
215 setOperationAction(ISD::SHL, MVT::i8, Custom);
216 setOperationAction(ISD::SRL, MVT::i8, Custom);
217 setOperationAction(ISD::SRA, MVT::i8, Custom);
218 // And SPU needs custom lowering for shift left/right for i64
219 setOperationAction(ISD::SHL, MVT::i64, Custom);
220 setOperationAction(ISD::SRL, MVT::i64, Custom);
221 setOperationAction(ISD::SRA, MVT::i64, Custom);
223 // Custom lower i8, i32 and i64 multiplications
224 setOperationAction(ISD::MUL, MVT::i8, Custom);
225 setOperationAction(ISD::MUL, MVT::i32, Custom);
226 setOperationAction(ISD::MUL, MVT::i64, Expand);
228 // Need to custom handle (some) common i8, i64 math ops
229 setOperationAction(ISD::ADD, MVT::i64, Custom);
230 setOperationAction(ISD::SUB, MVT::i8, Custom);
231 setOperationAction(ISD::SUB, MVT::i64, Custom);
233 // SPU does not have BSWAP. It does have i32 support CTLZ.
234 // CTPOP has to be custom lowered.
235 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
236 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
238 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
239 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
240 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
241 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
243 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
244 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
246 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
248 // SPU has a version of select that implements (a&~c)|(b&c), just like
249 // select ought to work:
250 setOperationAction(ISD::SELECT, MVT::i1, Promote);
251 setOperationAction(ISD::SELECT, MVT::i8, Legal);
252 setOperationAction(ISD::SELECT, MVT::i16, Legal);
253 setOperationAction(ISD::SELECT, MVT::i32, Legal);
254 setOperationAction(ISD::SELECT, MVT::i64, Expand);
256 setOperationAction(ISD::SETCC, MVT::i1, Promote);
257 setOperationAction(ISD::SETCC, MVT::i8, Legal);
258 setOperationAction(ISD::SETCC, MVT::i16, Legal);
259 setOperationAction(ISD::SETCC, MVT::i32, Legal);
260 setOperationAction(ISD::SETCC, MVT::i64, Expand);
262 // Zero extension and sign extension for i64 have to be
264 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
265 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
266 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
268 // SPU has a legal FP -> signed INT instruction
269 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
270 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
271 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
272 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
274 // FDIV on SPU requires custom lowering
275 setOperationAction(ISD::FDIV, MVT::f32, Custom);
276 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
278 // SPU has [U|S]INT_TO_FP
279 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
280 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
281 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
282 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
283 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
284 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
285 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
286 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
288 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
289 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
290 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
291 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
293 // We cannot sextinreg(i1). Expand to shifts.
294 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
296 // Support label based line numbers.
297 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
298 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
300 // We want to legalize GlobalAddress and ConstantPool nodes into the
301 // appropriate instructions to materialize the address.
302 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
304 MVT VT = (MVT::SimpleValueType)sctype;
306 setOperationAction(ISD::GlobalAddress, VT, Custom);
307 setOperationAction(ISD::ConstantPool, VT, Custom);
308 setOperationAction(ISD::JumpTable, VT, Custom);
311 // RET must be custom lowered, to meet ABI requirements
312 setOperationAction(ISD::RET, MVT::Other, Custom);
314 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
315 setOperationAction(ISD::VASTART , MVT::Other, Custom);
317 // Use the default implementation.
318 setOperationAction(ISD::VAARG , MVT::Other, Expand);
319 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
320 setOperationAction(ISD::VAEND , MVT::Other, Expand);
321 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
322 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
323 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
324 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
326 // Cell SPU has instructions for converting between i64 and fp.
327 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
328 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
330 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
331 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
333 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
334 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
336 // First set operation action for all vector types to expand. Then we
337 // will selectively turn on ones that can be effectively codegen'd.
338 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
339 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
340 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
341 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
342 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
343 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
345 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
346 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
347 MVT VT = (MVT::SimpleValueType)i;
349 // add/sub are legal for all supported vector VT's.
350 setOperationAction(ISD::ADD , VT, Legal);
351 setOperationAction(ISD::SUB , VT, Legal);
352 // mul has to be custom lowered.
353 setOperationAction(ISD::MUL , VT, Custom);
355 setOperationAction(ISD::AND , VT, Legal);
356 setOperationAction(ISD::OR , VT, Legal);
357 setOperationAction(ISD::XOR , VT, Legal);
358 setOperationAction(ISD::LOAD , VT, Legal);
359 setOperationAction(ISD::SELECT, VT, Legal);
360 setOperationAction(ISD::STORE, VT, Legal);
362 // These operations need to be expanded:
363 setOperationAction(ISD::SDIV, VT, Expand);
364 setOperationAction(ISD::SREM, VT, Expand);
365 setOperationAction(ISD::UDIV, VT, Expand);
366 setOperationAction(ISD::UREM, VT, Expand);
367 setOperationAction(ISD::FDIV, VT, Custom);
369 // Custom lower build_vector, constant pool spills, insert and
370 // extract vector elements:
371 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
372 setOperationAction(ISD::ConstantPool, VT, Custom);
373 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
374 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
375 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
376 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
379 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
380 setOperationAction(ISD::AND, MVT::v16i8, Custom);
381 setOperationAction(ISD::OR, MVT::v16i8, Custom);
382 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
383 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
385 setShiftAmountType(MVT::i32);
386 setSetCCResultContents(ZeroOrOneSetCCResult);
388 setStackPointerRegisterToSaveRestore(SPU::R1);
390 // We have target-specific dag combine patterns for the following nodes:
391 setTargetDAGCombine(ISD::ADD);
392 setTargetDAGCombine(ISD::ZERO_EXTEND);
393 setTargetDAGCombine(ISD::SIGN_EXTEND);
394 setTargetDAGCombine(ISD::ANY_EXTEND);
396 computeRegisterProperties();
400 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
402 if (node_names.empty()) {
403 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
404 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
405 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
406 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
407 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
408 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
409 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
410 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
411 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
412 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
413 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
414 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
415 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
416 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
417 = "SPUISD::EXTRACT_ELT0_CHAINED";
418 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
419 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
420 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
421 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
422 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
423 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
424 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
425 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
426 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
427 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
428 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
429 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
430 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
431 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
432 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
433 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
434 "SPUISD::ROTQUAD_RZ_BYTES";
435 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
436 "SPUISD::ROTQUAD_RZ_BITS";
437 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
438 "SPUISD::ROTBYTES_RIGHT_S";
439 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
440 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
441 "SPUISD::ROTBYTES_LEFT_CHAINED";
442 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
443 "SPUISD::ROTBYTES_LEFT_BITS";
444 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
445 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
446 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
447 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
448 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
449 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
450 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
451 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
452 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
455 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
457 return ((i != node_names.end()) ? i->second : 0);
460 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
461 MVT VT = Op.getValueType();
462 return (VT.isInteger() ? VT : MVT(MVT::i32));
465 //===----------------------------------------------------------------------===//
466 // Calling convention code:
467 //===----------------------------------------------------------------------===//
469 #include "SPUGenCallingConv.inc"
471 //===----------------------------------------------------------------------===//
472 // LowerOperation implementation
473 //===----------------------------------------------------------------------===//
475 /// Aligned load common code for CellSPU
477 \param[in] Op The SelectionDAG load or store operand
478 \param[in] DAG The selection DAG
479 \param[in] ST CellSPU subtarget information structure
480 \param[in,out] alignment Caller initializes this to the load or store node's
481 value from getAlignment(), may be updated while generating the aligned load
482 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
483 offset (divisible by 16, modulo 16 == 0)
484 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
485 offset of the preferred slot (modulo 16 != 0)
486 \param[in,out] VT Caller initializes this value type to the the load or store
487 node's loaded or stored value type; may be updated if an i1-extended load or
489 \param[out] was16aligned true if the base pointer had 16-byte alignment,
490 otherwise false. Can help to determine if the chunk needs to be rotated.
492 Both load and store lowering load a block of data aligned on a 16-byte
493 boundary. This is the common aligned load code shared between both.
496 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
498 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
499 MVT &VT, bool &was16aligned)
501 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
502 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
503 SDValue basePtr = LSN->getBasePtr();
504 SDValue chain = LSN->getChain();
506 if (basePtr.getOpcode() == ISD::ADD) {
507 SDValue Op1 = basePtr.getNode()->getOperand(1);
509 if (Op1.getOpcode() == ISD::Constant
510 || Op1.getOpcode() == ISD::TargetConstant) {
511 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
513 alignOffs = (int) CN->getZExtValue();
514 prefSlotOffs = (int) (alignOffs & 0xf);
516 // Adjust the rotation amount to ensure that the final result ends up in
517 // the preferred slot:
518 prefSlotOffs -= vtm->prefslot_byte;
519 basePtr = basePtr.getOperand(0);
521 // Loading from memory, can we adjust alignment?
522 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
523 SDValue APtr = basePtr.getOperand(0);
524 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
525 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
526 alignment = GSDN->getGlobal()->getAlignment();
531 prefSlotOffs = -vtm->prefslot_byte;
533 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
534 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
535 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
536 prefSlotOffs = (int) (alignOffs & 0xf);
537 prefSlotOffs -= vtm->prefslot_byte;
538 basePtr = DAG.getRegister(SPU::R1, VT);
541 prefSlotOffs = -vtm->prefslot_byte;
544 if (alignment == 16) {
545 // Realign the base pointer as a D-Form address:
546 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
547 basePtr = DAG.getNode(ISD::ADD, PtrVT,
549 DAG.getConstant((alignOffs & ~0xf), PtrVT));
552 // Emit the vector load:
554 return DAG.getLoad(MVT::v16i8, chain, basePtr,
555 LSN->getSrcValue(), LSN->getSrcValueOffset(),
556 LSN->isVolatile(), 16);
559 // Unaligned load or we're using the "large memory" model, which means that
560 // we have to be very pessimistic:
561 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
562 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
563 DAG.getConstant(0, PtrVT));
567 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
568 DAG.getConstant((alignOffs & ~0xf), PtrVT));
569 was16aligned = false;
570 return DAG.getLoad(MVT::v16i8, chain, basePtr,
571 LSN->getSrcValue(), LSN->getSrcValueOffset(),
572 LSN->isVolatile(), 16);
575 /// Custom lower loads for CellSPU
577 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
578 within a 16-byte block, we have to rotate to extract the requested element.
581 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
582 LoadSDNode *LN = cast<LoadSDNode>(Op);
583 SDValue the_chain = LN->getChain();
584 MVT VT = LN->getMemoryVT();
585 MVT OpVT = Op.getNode()->getValueType(0);
586 ISD::LoadExtType ExtType = LN->getExtensionType();
587 unsigned alignment = LN->getAlignment();
590 switch (LN->getAddressingMode()) {
591 case ISD::UNINDEXED: {
595 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
597 if (result.getNode() == 0)
600 the_chain = result.getValue(1);
601 // Rotate the chunk if necessary
604 if (rotamt != 0 || !was16aligned) {
605 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
610 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
612 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
613 LoadSDNode *LN1 = cast<LoadSDNode>(result);
614 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
615 DAG.getConstant(rotamt, PtrVT));
618 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
619 the_chain = result.getValue(1);
622 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
624 MVT vecVT = MVT::v16i8;
626 // Convert the loaded v16i8 vector to the appropriate vector type
627 // specified by the operand:
630 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
632 vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
635 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
636 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
637 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
638 the_chain = result.getValue(1);
640 // Handle the sign and zero-extending loads for i1 and i8:
643 if (ExtType == ISD::SEXTLOAD) {
644 NewOpC = (OpVT == MVT::i1
645 ? SPUISD::EXTRACT_I1_SEXT
646 : SPUISD::EXTRACT_I8_SEXT);
648 assert(ExtType == ISD::ZEXTLOAD);
649 NewOpC = (OpVT == MVT::i1
650 ? SPUISD::EXTRACT_I1_ZEXT
651 : SPUISD::EXTRACT_I8_ZEXT);
654 result = DAG.getNode(NewOpC, OpVT, result);
657 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
658 SDValue retops[2] = {
663 result = DAG.getNode(SPUISD::LDRESULT, retvts,
664 retops, sizeof(retops) / sizeof(retops[0]));
671 case ISD::LAST_INDEXED_MODE:
672 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
674 cerr << (unsigned) LN->getAddressingMode() << "\n";
682 /// Custom lower stores for CellSPU
684 All CellSPU stores are aligned to 16-byte boundaries, so for elements
685 within a 16-byte block, we have to generate a shuffle to insert the
686 requested element into its place, then store the resulting block.
689 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
690 StoreSDNode *SN = cast<StoreSDNode>(Op);
691 SDValue Value = SN->getValue();
692 MVT VT = Value.getValueType();
693 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
694 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
695 unsigned alignment = SN->getAlignment();
697 switch (SN->getAddressingMode()) {
698 case ISD::UNINDEXED: {
699 int chunk_offset, slot_offset;
702 // The vector type we really want to load from the 16-byte chunk, except
703 // in the case of MVT::i1, which has to be v16i8.
704 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
705 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
707 SDValue alignLoadVec =
708 AlignedLoad(Op, DAG, ST, SN, alignment,
709 chunk_offset, slot_offset, VT, was16aligned);
711 if (alignLoadVec.getNode() == 0)
714 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
715 SDValue basePtr = LN->getBasePtr();
716 SDValue the_chain = alignLoadVec.getValue(1);
717 SDValue theValue = SN->getValue();
721 && (theValue.getOpcode() == ISD::AssertZext
722 || theValue.getOpcode() == ISD::AssertSext)) {
723 // Drill down and get the value for zero- and sign-extended
725 theValue = theValue.getOperand(0);
730 SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
731 SDValue insertEltPtr;
733 // If the base pointer is already a D-form address, then just create
734 // a new D-form address with a slot offset and the orignal base pointer.
735 // Otherwise generate a D-form address with the slot offset relative
736 // to the stack pointer, which is always aligned.
737 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
738 DEBUG(basePtr.getNode()->dump(&DAG));
741 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
742 (basePtr.getOpcode() == ISD::ADD
743 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
744 insertEltPtr = basePtr;
746 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
749 SDValue insertEltOp =
750 DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
751 SDValue vectorizeOp =
752 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
754 result = DAG.getNode(SPUISD::SHUFB, vecVT, vectorizeOp, alignLoadVec,
755 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
757 result = DAG.getStore(the_chain, result, basePtr,
758 LN->getSrcValue(), LN->getSrcValueOffset(),
759 LN->isVolatile(), LN->getAlignment());
761 #if 0 && defined(NDEBUG)
762 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
763 const SDValue ¤tRoot = DAG.getRoot();
766 cerr << "------- CellSPU:LowerStore result:\n";
769 DAG.setRoot(currentRoot);
780 case ISD::LAST_INDEXED_MODE:
781 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
783 cerr << (unsigned) SN->getAddressingMode() << "\n";
791 /// Generate the address of a constant pool entry.
793 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
794 MVT PtrVT = Op.getValueType();
795 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
796 Constant *C = CP->getConstVal();
797 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
798 SDValue Zero = DAG.getConstant(0, PtrVT);
799 const TargetMachine &TM = DAG.getTarget();
801 if (TM.getRelocationModel() == Reloc::Static) {
802 if (!ST->usingLargeMem()) {
803 // Just return the SDValue with the constant pool address in it.
804 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
806 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
807 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
808 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
813 "LowerConstantPool: Relocation model other than static"
819 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
820 MVT PtrVT = Op.getValueType();
821 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
822 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
823 SDValue Zero = DAG.getConstant(0, PtrVT);
824 const TargetMachine &TM = DAG.getTarget();
826 if (TM.getRelocationModel() == Reloc::Static) {
827 if (!ST->usingLargeMem()) {
828 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
830 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
831 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
832 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
837 "LowerJumpTable: Relocation model other than static not supported.");
842 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
843 MVT PtrVT = Op.getValueType();
844 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
845 GlobalValue *GV = GSDN->getGlobal();
846 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
847 const TargetMachine &TM = DAG.getTarget();
848 SDValue Zero = DAG.getConstant(0, PtrVT);
850 if (TM.getRelocationModel() == Reloc::Static) {
851 if (!ST->usingLargeMem()) {
852 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
854 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
855 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
856 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
859 cerr << "LowerGlobalAddress: Relocation model other than static not "
868 //! Custom lower i64 integer constants
870 This code inserts all of the necessary juggling that needs to occur to load
871 a 64-bit constant into a register.
874 LowerConstant(SDValue Op, SelectionDAG &DAG) {
875 MVT VT = Op.getValueType();
876 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
878 if (VT == MVT::i64) {
879 SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
880 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
881 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
883 cerr << "LowerConstant: unhandled constant type "
893 //! Custom lower double precision floating point constants
895 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
896 MVT VT = Op.getValueType();
897 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
900 "LowerConstantFP: Node is not ConstantFPSDNode");
902 if (VT == MVT::f64) {
903 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
904 return DAG.getNode(ISD::BIT_CONVERT, VT,
905 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
911 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
913 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
915 SDValue Cond = Op.getOperand(1);
916 MVT CondVT = Cond.getValueType();
919 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
920 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
921 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
923 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
926 return SDValue(); // Unchanged
930 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
932 MachineFunction &MF = DAG.getMachineFunction();
933 MachineFrameInfo *MFI = MF.getFrameInfo();
934 MachineRegisterInfo &RegInfo = MF.getRegInfo();
935 SmallVector<SDValue, 48> ArgValues;
936 SDValue Root = Op.getOperand(0);
937 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
939 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
940 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
942 unsigned ArgOffset = SPUFrameInfo::minStackSize();
943 unsigned ArgRegIdx = 0;
944 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
946 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
948 // Add DAG nodes to load the arguments or copy them out of registers.
949 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
950 ArgNo != e; ++ArgNo) {
951 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
952 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
955 if (ArgRegIdx < NumArgRegs) {
956 const TargetRegisterClass *ArgRegClass;
958 switch (ObjectVT.getSimpleVT()) {
960 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
961 << ObjectVT.getMVTString()
966 ArgRegClass = &SPU::R8CRegClass;
969 ArgRegClass = &SPU::R16CRegClass;
972 ArgRegClass = &SPU::R32CRegClass;
975 ArgRegClass = &SPU::R64CRegClass;
978 ArgRegClass = &SPU::R32FPRegClass;
981 ArgRegClass = &SPU::R64FPRegClass;
989 ArgRegClass = &SPU::VECREGRegClass;
993 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
994 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
995 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
998 // We need to load the argument to a virtual register if we determined
999 // above that we ran out of physical registers of the appropriate type
1000 // or we're forced to do vararg
1001 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1002 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1003 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1004 ArgOffset += StackSlotSize;
1007 ArgValues.push_back(ArgVal);
1009 Root = ArgVal.getOperand(0);
1014 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1015 // We will spill (79-3)+1 registers to the stack
1016 SmallVector<SDValue, 79-3+1> MemOps;
1018 // Create the frame slot
1020 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1021 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1022 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1023 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1024 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1025 Root = Store.getOperand(0);
1026 MemOps.push_back(Store);
1028 // Increment address by stack slot size for the next stored argument
1029 ArgOffset += StackSlotSize;
1031 if (!MemOps.empty())
1032 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1035 ArgValues.push_back(Root);
1037 // Return the new list of results.
1038 return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1042 /// isLSAAddress - Return the immediate to use if the specified
1043 /// value is representable as a LSA address.
1044 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1045 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1048 int Addr = C->getZExtValue();
1049 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1050 (Addr << 14 >> 14) != Addr)
1051 return 0; // Top 14 bits have to be sext of immediate.
1053 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1058 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1059 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1060 SDValue Chain = TheCall->getChain();
1061 SDValue Callee = TheCall->getCallee();
1062 unsigned NumOps = TheCall->getNumArgs();
1063 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1064 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1065 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1067 // Handy pointer type
1068 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1070 // Accumulate how many bytes are to be pushed on the stack, including the
1071 // linkage area, and parameter passing area. According to the SPU ABI,
1072 // we minimally need space for [LR] and [SP]
1073 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1075 // Set up a copy of the stack pointer for use loading and storing any
1076 // arguments that may not fit in the registers available for argument
1078 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1080 // Figure out which arguments are going to go in registers, and which in
1082 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1083 unsigned ArgRegIdx = 0;
1085 // Keep track of registers passing arguments
1086 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1087 // And the arguments passed on the stack
1088 SmallVector<SDValue, 8> MemOpChains;
1090 for (unsigned i = 0; i != NumOps; ++i) {
1091 SDValue Arg = TheCall->getArg(i);
1093 // PtrOff will be used to store the current argument to the stack if a
1094 // register cannot be found for it.
1095 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1096 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1098 switch (Arg.getValueType().getSimpleVT()) {
1099 default: assert(0 && "Unexpected ValueType for argument!");
1103 if (ArgRegIdx != NumArgRegs) {
1104 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1106 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1107 ArgOffset += StackSlotSize;
1112 if (ArgRegIdx != NumArgRegs) {
1113 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1115 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1116 ArgOffset += StackSlotSize;
1123 if (ArgRegIdx != NumArgRegs) {
1124 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1126 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1127 ArgOffset += StackSlotSize;
1133 // Update number of stack bytes actually used, insert a call sequence start
1134 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1135 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1138 if (!MemOpChains.empty()) {
1139 // Adjust the stack pointer for the stack arguments.
1140 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1141 &MemOpChains[0], MemOpChains.size());
1144 // Build a sequence of copy-to-reg nodes chained together with token chain
1145 // and flag operands which copy the outgoing args into the appropriate regs.
1147 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1148 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1150 InFlag = Chain.getValue(1);
1153 SmallVector<SDValue, 8> Ops;
1154 unsigned CallOpc = SPUISD::CALL;
1156 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1157 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1158 // node so that legalize doesn't hack it.
1159 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1160 GlobalValue *GV = G->getGlobal();
1161 MVT CalleeVT = Callee.getValueType();
1162 SDValue Zero = DAG.getConstant(0, PtrVT);
1163 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1165 if (!ST->usingLargeMem()) {
1166 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1167 // style calls, otherwise, external symbols are BRASL calls. This assumes
1168 // that declared/defined symbols are in the same compilation unit and can
1169 // be reached through PC-relative jumps.
1172 // This may be an unsafe assumption for JIT and really large compilation
1174 if (GV->isDeclaration()) {
1175 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1177 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1180 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1182 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1184 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1185 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1186 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1187 // If this is an absolute destination address that appears to be a legal
1188 // local store address, use the munged value.
1189 Callee = SDValue(Dest, 0);
1192 Ops.push_back(Chain);
1193 Ops.push_back(Callee);
1195 // Add argument registers to the end of the list so that they are known live
1197 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1198 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1199 RegsToPass[i].second.getValueType()));
1201 if (InFlag.getNode())
1202 Ops.push_back(InFlag);
1203 // Returns a chain and a flag for retval copy to use.
1204 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1205 &Ops[0], Ops.size());
1206 InFlag = Chain.getValue(1);
1208 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1209 DAG.getIntPtrConstant(0, true), InFlag);
1210 if (TheCall->getValueType(0) != MVT::Other)
1211 InFlag = Chain.getValue(1);
1213 SDValue ResultVals[3];
1214 unsigned NumResults = 0;
1216 // If the call has results, copy the values out of the ret val registers.
1217 switch (TheCall->getValueType(0).getSimpleVT()) {
1218 default: assert(0 && "Unexpected ret value!");
1219 case MVT::Other: break;
1221 if (TheCall->getValueType(1) == MVT::i32) {
1222 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1223 ResultVals[0] = Chain.getValue(0);
1224 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1225 Chain.getValue(2)).getValue(1);
1226 ResultVals[1] = Chain.getValue(0);
1229 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1230 ResultVals[0] = Chain.getValue(0);
1235 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1236 ResultVals[0] = Chain.getValue(0);
1241 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1242 InFlag).getValue(1);
1243 ResultVals[0] = Chain.getValue(0);
1251 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1252 InFlag).getValue(1);
1253 ResultVals[0] = Chain.getValue(0);
1258 // If the function returns void, just return the chain.
1259 if (NumResults == 0)
1262 // Otherwise, merge everything together with a MERGE_VALUES node.
1263 ResultVals[NumResults++] = Chain;
1264 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1265 return Res.getValue(Op.getResNo());
1269 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1270 SmallVector<CCValAssign, 16> RVLocs;
1271 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1272 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1273 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1274 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1276 // If this is the first return lowered for this function, add the regs to the
1277 // liveout set for the function.
1278 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1279 for (unsigned i = 0; i != RVLocs.size(); ++i)
1280 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1283 SDValue Chain = Op.getOperand(0);
1286 // Copy the result values into the output registers.
1287 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1288 CCValAssign &VA = RVLocs[i];
1289 assert(VA.isRegLoc() && "Can only return in registers!");
1290 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1291 Flag = Chain.getValue(1);
1295 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1297 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1301 //===----------------------------------------------------------------------===//
1302 // Vector related lowering:
1303 //===----------------------------------------------------------------------===//
1305 static ConstantSDNode *
1306 getVecImm(SDNode *N) {
1307 SDValue OpVal(0, 0);
1309 // Check to see if this buildvec has a single non-undef value in its elements.
1310 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1311 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1312 if (OpVal.getNode() == 0)
1313 OpVal = N->getOperand(i);
1314 else if (OpVal != N->getOperand(i))
1318 if (OpVal.getNode() != 0) {
1319 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1324 return 0; // All UNDEF: use implicit def.; not Constant node
1327 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1328 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1330 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1332 if (ConstantSDNode *CN = getVecImm(N)) {
1333 uint64_t Value = CN->getZExtValue();
1334 if (ValueType == MVT::i64) {
1335 uint64_t UValue = CN->getZExtValue();
1336 uint32_t upper = uint32_t(UValue >> 32);
1337 uint32_t lower = uint32_t(UValue);
1340 Value = Value >> 32;
1342 if (Value <= 0x3ffff)
1343 return DAG.getTargetConstant(Value, ValueType);
1349 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1350 /// and the value fits into a signed 16-bit constant, and if so, return the
1352 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1354 if (ConstantSDNode *CN = getVecImm(N)) {
1355 int64_t Value = CN->getSExtValue();
1356 if (ValueType == MVT::i64) {
1357 uint64_t UValue = CN->getZExtValue();
1358 uint32_t upper = uint32_t(UValue >> 32);
1359 uint32_t lower = uint32_t(UValue);
1362 Value = Value >> 32;
1364 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1365 return DAG.getTargetConstant(Value, ValueType);
1372 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1373 /// and the value fits into a signed 10-bit constant, and if so, return the
1375 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1377 if (ConstantSDNode *CN = getVecImm(N)) {
1378 int64_t Value = CN->getSExtValue();
1379 if (ValueType == MVT::i64) {
1380 uint64_t UValue = CN->getZExtValue();
1381 uint32_t upper = uint32_t(UValue >> 32);
1382 uint32_t lower = uint32_t(UValue);
1385 Value = Value >> 32;
1387 if (isS10Constant(Value))
1388 return DAG.getTargetConstant(Value, ValueType);
1394 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1395 /// and the value fits into a signed 8-bit constant, and if so, return the
1398 /// @note: The incoming vector is v16i8 because that's the only way we can load
1399 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1401 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1403 if (ConstantSDNode *CN = getVecImm(N)) {
1404 int Value = (int) CN->getZExtValue();
1405 if (ValueType == MVT::i16
1406 && Value <= 0xffff /* truncated from uint64_t */
1407 && ((short) Value >> 8) == ((short) Value & 0xff))
1408 return DAG.getTargetConstant(Value & 0xff, ValueType);
1409 else if (ValueType == MVT::i8
1410 && (Value & 0xff) == Value)
1411 return DAG.getTargetConstant(Value, ValueType);
1417 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1418 /// and the value fits into a signed 16-bit constant, and if so, return the
1420 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1422 if (ConstantSDNode *CN = getVecImm(N)) {
1423 uint64_t Value = CN->getZExtValue();
1424 if ((ValueType == MVT::i32
1425 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1426 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1427 return DAG.getTargetConstant(Value >> 16, ValueType);
1433 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1434 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1435 if (ConstantSDNode *CN = getVecImm(N)) {
1436 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1442 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1443 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1444 if (ConstantSDNode *CN = getVecImm(N)) {
1445 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1451 // If this is a vector of constants or undefs, get the bits. A bit in
1452 // UndefBits is set if the corresponding element of the vector is an
1453 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1454 // zero. Return true if this is not an array of constants, false if it is.
1456 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1457 uint64_t UndefBits[2]) {
1458 // Start with zero'd results.
1459 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1461 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1462 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1463 SDValue OpVal = BV->getOperand(i);
1465 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1466 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1468 uint64_t EltBits = 0;
1469 if (OpVal.getOpcode() == ISD::UNDEF) {
1470 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1471 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1473 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1474 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1475 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1476 const APFloat &apf = CN->getValueAPF();
1477 EltBits = (CN->getValueType(0) == MVT::f32
1478 ? FloatToBits(apf.convertToFloat())
1479 : DoubleToBits(apf.convertToDouble()));
1481 // Nonconstant element.
1485 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1488 //printf("%llx %llx %llx %llx\n",
1489 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1493 /// If this is a splat (repetition) of a value across the whole vector, return
1494 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1495 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1496 /// SplatSize = 1 byte.
1497 static bool isConstantSplat(const uint64_t Bits128[2],
1498 const uint64_t Undef128[2],
1500 uint64_t &SplatBits, uint64_t &SplatUndef,
1502 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1503 // the same as the lower 64-bits, ignoring undefs.
1504 uint64_t Bits64 = Bits128[0] | Bits128[1];
1505 uint64_t Undef64 = Undef128[0] & Undef128[1];
1506 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1507 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1508 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1509 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1511 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1512 if (MinSplatBits < 64) {
1514 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1516 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1517 if (MinSplatBits < 32) {
1519 // If the top 16-bits are different than the lower 16-bits, ignoring
1520 // undefs, we have an i32 splat.
1521 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1522 if (MinSplatBits < 16) {
1523 // If the top 8-bits are different than the lower 8-bits, ignoring
1524 // undefs, we have an i16 splat.
1525 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1526 == ((Bits16 >> 8) & ~Undef16)) {
1527 // Otherwise, we have an 8-bit splat.
1528 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1529 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1535 SplatUndef = Undef16;
1542 SplatUndef = Undef32;
1548 SplatBits = Bits128[0];
1549 SplatUndef = Undef128[0];
1555 return false; // Can't be a splat if two pieces don't match.
1558 // If this is a case we can't handle, return null and let the default
1559 // expansion code take care of it. If we CAN select this case, and if it
1560 // selects to a single instruction, return Op. Otherwise, if we can codegen
1561 // this case more efficiently than a constant pool load, lower it to the
1562 // sequence of ops that should be used.
1563 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1564 MVT VT = Op.getValueType();
1565 // If this is a vector of constants or undefs, get the bits. A bit in
1566 // UndefBits is set if the corresponding element of the vector is an
1567 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1569 uint64_t VectorBits[2];
1570 uint64_t UndefBits[2];
1571 uint64_t SplatBits, SplatUndef;
1573 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1574 || !isConstantSplat(VectorBits, UndefBits,
1575 VT.getVectorElementType().getSizeInBits(),
1576 SplatBits, SplatUndef, SplatSize))
1577 return SDValue(); // Not a constant vector, not a splat.
1579 switch (VT.getSimpleVT()) {
1582 uint32_t Value32 = SplatBits;
1583 assert(SplatSize == 4
1584 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1585 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1586 SDValue T = DAG.getConstant(Value32, MVT::i32);
1587 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1588 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1592 uint64_t f64val = SplatBits;
1593 assert(SplatSize == 8
1594 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1595 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1596 SDValue T = DAG.getConstant(f64val, MVT::i64);
1597 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1598 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1602 // 8-bit constants have to be expanded to 16-bits
1603 unsigned short Value16 = SplatBits | (SplatBits << 8);
1605 for (int i = 0; i < 8; ++i)
1606 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1607 return DAG.getNode(ISD::BIT_CONVERT, VT,
1608 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1611 unsigned short Value16;
1613 Value16 = (unsigned short) (SplatBits & 0xffff);
1615 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1616 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1618 for (int i = 0; i < 8; ++i) Ops[i] = T;
1619 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1622 unsigned int Value = SplatBits;
1623 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1624 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1627 uint64_t val = SplatBits;
1628 uint32_t upper = uint32_t(val >> 32);
1629 uint32_t lower = uint32_t(val);
1631 if (upper == lower) {
1632 // Magic constant that can be matched by IL, ILA, et. al.
1633 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1634 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1638 SmallVector<SDValue, 16> ShufBytes;
1640 bool upper_special, lower_special;
1642 // NOTE: This code creates common-case shuffle masks that can be easily
1643 // detected as common expressions. It is not attempting to create highly
1644 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1646 // Detect if the upper or lower half is a special shuffle mask pattern:
1647 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1648 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1650 // Create lower vector if not a special pattern
1651 if (!lower_special) {
1652 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1653 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1654 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1655 LO32C, LO32C, LO32C, LO32C));
1658 // Create upper vector if not a special pattern
1659 if (!upper_special) {
1660 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1661 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1662 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1663 HI32C, HI32C, HI32C, HI32C));
1666 // If either upper or lower are special, then the two input operands are
1667 // the same (basically, one of them is a "don't care")
1672 if (lower_special && upper_special) {
1673 // Unhappy situation... both upper and lower are special, so punt with
1674 // a target constant:
1675 SDValue Zero = DAG.getConstant(0, MVT::i32);
1676 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1680 for (int i = 0; i < 4; ++i) {
1682 for (int j = 0; j < 4; ++j) {
1684 bool process_upper, process_lower;
1686 process_upper = (upper_special && (i & 1) == 0);
1687 process_lower = (lower_special && (i & 1) == 1);
1689 if (process_upper || process_lower) {
1690 if ((process_upper && upper == 0)
1691 || (process_lower && lower == 0))
1693 else if ((process_upper && upper == 0xffffffff)
1694 || (process_lower && lower == 0xffffffff))
1696 else if ((process_upper && upper == 0x80000000)
1697 || (process_lower && lower == 0x80000000))
1698 val |= (j == 0 ? 0xe0 : 0x80);
1700 val |= i * 4 + j + ((i & 1) * 16);
1703 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1706 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1707 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1708 &ShufBytes[0], ShufBytes.size()));
1716 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1717 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1718 /// permutation vector, V3, is monotonically increasing with one "exception"
1719 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1720 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1721 /// In either case, the net result is going to eventually invoke SHUFB to
1722 /// permute/shuffle the bytes from V1 and V2.
1724 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1725 /// control word for byte/halfword/word insertion. This takes care of a single
1726 /// element move from V2 into V1.
1728 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1729 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1730 SDValue V1 = Op.getOperand(0);
1731 SDValue V2 = Op.getOperand(1);
1732 SDValue PermMask = Op.getOperand(2);
1734 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1736 // If we have a single element being moved from V1 to V2, this can be handled
1737 // using the C*[DX] compute mask instructions, but the vector elements have
1738 // to be monotonically increasing with one exception element.
1739 MVT EltVT = V1.getValueType().getVectorElementType();
1740 unsigned EltsFromV2 = 0;
1742 unsigned V2EltIdx0 = 0;
1743 unsigned CurrElt = 0;
1744 bool monotonic = true;
1745 if (EltVT == MVT::i8)
1747 else if (EltVT == MVT::i16)
1749 else if (EltVT == MVT::i32)
1752 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1754 for (unsigned i = 0, e = PermMask.getNumOperands();
1755 EltsFromV2 <= 1 && monotonic && i != e;
1758 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1761 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1763 if (SrcElt >= V2EltIdx0) {
1765 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1766 } else if (CurrElt != SrcElt) {
1773 if (EltsFromV2 == 1 && monotonic) {
1774 // Compute mask and shuffle
1775 MachineFunction &MF = DAG.getMachineFunction();
1776 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1777 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1778 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1779 // Initialize temporary register to 0
1780 SDValue InitTempReg =
1781 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1782 // Copy register's contents as index in INSERT_MASK:
1783 SDValue ShufMaskOp =
1784 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1785 DAG.getTargetConstant(V2Elt, MVT::i32),
1786 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1787 // Use shuffle mask in SHUFB synthetic instruction:
1788 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1790 // Convert the SHUFFLE_VECTOR mask's input element units to the
1792 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1794 SmallVector<SDValue, 16> ResultMask;
1795 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1797 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1800 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1802 for (unsigned j = 0; j < BytesPerElement; ++j) {
1803 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1808 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1809 &ResultMask[0], ResultMask.size());
1810 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1814 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1815 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1817 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1818 // For a constant, build the appropriate constant vector, which will
1819 // eventually simplify to a vector register load.
1821 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1822 SmallVector<SDValue, 16> ConstVecValues;
1826 // Create a constant vector:
1827 switch (Op.getValueType().getSimpleVT()) {
1828 default: assert(0 && "Unexpected constant value type in "
1829 "LowerSCALAR_TO_VECTOR");
1830 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1831 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1832 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1833 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1834 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1835 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1838 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1839 for (size_t j = 0; j < n_copies; ++j)
1840 ConstVecValues.push_back(CValue);
1842 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1843 &ConstVecValues[0], ConstVecValues.size());
1845 // Otherwise, copy the value from one register to another:
1846 switch (Op0.getValueType().getSimpleVT()) {
1847 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1854 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1861 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1862 switch (Op.getValueType().getSimpleVT()) {
1864 cerr << "CellSPU: Unknown vector multiplication, got "
1865 << Op.getValueType().getMVTString()
1871 SDValue rA = Op.getOperand(0);
1872 SDValue rB = Op.getOperand(1);
1873 SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1874 SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1875 SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1876 SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1878 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1882 // Multiply two v8i16 vectors (pipeline friendly version):
1883 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1884 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1885 // c) Use SELB to select upper and lower halves from the intermediate results
1887 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1888 // dual-issue. This code does manage to do this, even if it's a little on
1891 MachineFunction &MF = DAG.getMachineFunction();
1892 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1893 SDValue Chain = Op.getOperand(0);
1894 SDValue rA = Op.getOperand(0);
1895 SDValue rB = Op.getOperand(1);
1896 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1897 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1900 DAG.getCopyToReg(Chain, FSMBIreg,
1901 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1902 DAG.getConstant(0xcccc, MVT::i16)));
1905 DAG.getCopyToReg(FSMBOp, HiProdReg,
1906 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1908 SDValue HHProd_v4i32 =
1909 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1910 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1912 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1913 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1914 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1915 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1917 DAG.getConstant(16, MVT::i16))),
1918 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1921 // This M00sE is N@stI! (apologies to Monty Python)
1923 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1924 // is to break it all apart, sign extend, and reassemble the various
1925 // intermediate products.
1927 SDValue rA = Op.getOperand(0);
1928 SDValue rB = Op.getOperand(1);
1929 SDValue c8 = DAG.getConstant(8, MVT::i32);
1930 SDValue c16 = DAG.getConstant(16, MVT::i32);
1933 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1934 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1935 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1937 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1939 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1942 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1943 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1945 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1946 DAG.getConstant(0x2222, MVT::i16));
1948 SDValue LoProdParts =
1949 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1950 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1951 LLProd, LHProd, FSMBmask));
1953 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1956 DAG.getNode(ISD::AND, MVT::v4i32,
1958 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1959 LoProdMask, LoProdMask,
1960 LoProdMask, LoProdMask));
1963 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1964 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1967 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1968 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1971 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1972 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1973 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1976 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1977 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1978 DAG.getNode(SPUISD::VEC_SRA,
1979 MVT::v4i32, rAH, c8)),
1980 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1981 DAG.getNode(SPUISD::VEC_SRA,
1982 MVT::v4i32, rBH, c8)));
1985 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1987 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
1991 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
1993 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
1994 DAG.getNode(ISD::OR, MVT::v4i32,
2002 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2003 MachineFunction &MF = DAG.getMachineFunction();
2004 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2006 SDValue A = Op.getOperand(0);
2007 SDValue B = Op.getOperand(1);
2008 MVT VT = Op.getValueType();
2010 unsigned VRegBR, VRegC;
2012 if (VT == MVT::f32) {
2013 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2014 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2016 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2017 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2019 // TODO: make sure we're feeding FPInterp the right arguments
2020 // Right now: fi B, frest(B)
2023 // (Floating Interpolate (FP Reciprocal Estimate B))
2025 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2026 DAG.getNode(SPUISD::FPInterp, VT, B,
2027 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2029 // Computes A * BRcpl and stores in a temporary register
2031 DAG.getCopyToReg(BRcpl, VRegC,
2032 DAG.getNode(ISD::FMUL, VT, A,
2033 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2034 // What's the Chain variable do? It's magic!
2035 // TODO: set Chain = Op(0).getEntryNode()
2037 return DAG.getNode(ISD::FADD, VT,
2038 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2039 DAG.getNode(ISD::FMUL, VT,
2040 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2041 DAG.getNode(ISD::FSUB, VT, A,
2042 DAG.getNode(ISD::FMUL, VT, B,
2043 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2046 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2047 MVT VT = Op.getValueType();
2048 SDValue N = Op.getOperand(0);
2049 SDValue Elt = Op.getOperand(1);
2050 SDValue ShufMask[16];
2051 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2053 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2055 int EltNo = (int) C->getZExtValue();
2058 if (VT == MVT::i8 && EltNo >= 16)
2059 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2060 else if (VT == MVT::i16 && EltNo >= 8)
2061 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2062 else if (VT == MVT::i32 && EltNo >= 4)
2063 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2064 else if (VT == MVT::i64 && EltNo >= 2)
2065 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2067 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2068 // i32 and i64: Element 0 is the preferred slot
2069 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2072 // Need to generate shuffle mask and extract:
2073 int prefslot_begin = -1, prefslot_end = -1;
2074 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2076 switch (VT.getSimpleVT()) {
2078 assert(false && "Invalid value type!");
2080 prefslot_begin = prefslot_end = 3;
2084 prefslot_begin = 2; prefslot_end = 3;
2089 prefslot_begin = 0; prefslot_end = 3;
2094 prefslot_begin = 0; prefslot_end = 7;
2099 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2100 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2102 for (int i = 0; i < 16; ++i) {
2103 // zero fill uppper part of preferred slot, don't care about the
2105 unsigned int mask_val;
2107 if (i <= prefslot_end) {
2109 ((i < prefslot_begin)
2111 : elt_byte + (i - prefslot_begin));
2113 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2115 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2118 SDValue ShufMaskVec =
2119 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2121 sizeof(ShufMask) / sizeof(ShufMask[0]));
2123 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2124 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2125 N, N, ShufMaskVec));
2129 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2130 SDValue VecOp = Op.getOperand(0);
2131 SDValue ValOp = Op.getOperand(1);
2132 SDValue IdxOp = Op.getOperand(2);
2133 MVT VT = Op.getValueType();
2135 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2136 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2138 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2139 // Use $2 because it's always 16-byte aligned and it's available:
2140 SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2143 DAG.getNode(SPUISD::SHUFB, VT,
2144 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2146 DAG.getNode(SPUISD::INSERT_MASK, VT,
2147 DAG.getNode(ISD::ADD, PtrVT,
2149 DAG.getConstant(CN->getZExtValue(),
2155 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2157 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2159 assert(Op.getValueType() == MVT::i8);
2162 assert(0 && "Unhandled i8 math operator");
2166 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2168 SDValue N1 = Op.getOperand(1);
2169 N0 = (N0.getOpcode() != ISD::Constant
2170 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2171 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2173 N1 = (N1.getOpcode() != ISD::Constant
2174 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2175 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2177 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2178 DAG.getNode(Opc, MVT::i16, N0, N1));
2182 SDValue N1 = Op.getOperand(1);
2184 N0 = (N0.getOpcode() != ISD::Constant
2185 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2186 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2188 N1Opc = N1.getValueType().bitsLT(MVT::i32)
2191 N1 = (N1.getOpcode() != ISD::Constant
2192 ? DAG.getNode(N1Opc, MVT::i32, N1)
2193 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2196 DAG.getNode(ISD::OR, MVT::i16, N0,
2197 DAG.getNode(ISD::SHL, MVT::i16,
2198 N0, DAG.getConstant(8, MVT::i32)));
2199 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2200 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2204 SDValue N1 = Op.getOperand(1);
2206 N0 = (N0.getOpcode() != ISD::Constant
2207 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2208 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2210 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2213 N1 = (N1.getOpcode() != ISD::Constant
2214 ? DAG.getNode(N1Opc, MVT::i16, N1)
2215 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2217 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2218 DAG.getNode(Opc, MVT::i16, N0, N1));
2221 SDValue N1 = Op.getOperand(1);
2223 N0 = (N0.getOpcode() != ISD::Constant
2224 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2225 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2227 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2230 N1 = (N1.getOpcode() != ISD::Constant
2231 ? DAG.getNode(N1Opc, MVT::i16, N1)
2232 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2234 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2235 DAG.getNode(Opc, MVT::i16, N0, N1));
2238 SDValue N1 = Op.getOperand(1);
2240 N0 = (N0.getOpcode() != ISD::Constant
2241 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2242 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2244 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2245 N1 = (N1.getOpcode() != ISD::Constant
2246 ? DAG.getNode(N1Opc, MVT::i16, N1)
2247 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2249 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2250 DAG.getNode(Opc, MVT::i16, N0, N1));
2258 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2260 MVT VT = Op.getValueType();
2261 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2263 SDValue Op0 = Op.getOperand(0);
2266 case ISD::ZERO_EXTEND:
2267 case ISD::SIGN_EXTEND:
2268 case ISD::ANY_EXTEND: {
2269 MVT Op0VT = Op0.getValueType();
2270 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2272 assert(Op0VT == MVT::i32
2273 && "CellSPU: Zero/sign extending something other than i32");
2274 DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2276 unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2277 ? SPUISD::ROTBYTES_RIGHT_S
2278 : SPUISD::ROTQUAD_RZ_BYTES);
2279 SDValue PromoteScalar =
2280 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2282 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2283 DAG.getNode(ISD::BIT_CONVERT, VecVT,
2284 DAG.getNode(NewOpc, Op0VecVT,
2286 DAG.getConstant(4, MVT::i32))));
2290 // Turn operands into vectors to satisfy type checking (shufb works on
2293 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2295 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2296 SmallVector<SDValue, 16> ShufBytes;
2298 // Create the shuffle mask for "rotating" the borrow up one register slot
2299 // once the borrow is generated.
2300 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2301 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2302 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2303 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2306 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2307 SDValue ShiftedCarry =
2308 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2310 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2311 &ShufBytes[0], ShufBytes.size()));
2313 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2314 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2315 Op0, Op1, ShiftedCarry));
2319 // Turn operands into vectors to satisfy type checking (shufb works on
2322 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2324 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2325 SmallVector<SDValue, 16> ShufBytes;
2327 // Create the shuffle mask for "rotating" the borrow up one register slot
2328 // once the borrow is generated.
2329 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2330 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2331 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2332 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2335 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2336 SDValue ShiftedBorrow =
2337 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2338 BorrowGen, BorrowGen,
2339 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2340 &ShufBytes[0], ShufBytes.size()));
2342 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2343 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2344 Op0, Op1, ShiftedBorrow));
2348 SDValue ShiftAmt = Op.getOperand(1);
2349 MVT ShiftAmtVT = ShiftAmt.getValueType();
2350 SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2352 DAG.getNode(SPUISD::SELB, VecVT,
2354 DAG.getConstant(0, VecVT),
2355 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2356 DAG.getConstant(0xff00ULL, MVT::i16)));
2357 SDValue ShiftAmtBytes =
2358 DAG.getNode(ISD::SRL, ShiftAmtVT,
2360 DAG.getConstant(3, ShiftAmtVT));
2361 SDValue ShiftAmtBits =
2362 DAG.getNode(ISD::AND, ShiftAmtVT,
2364 DAG.getConstant(7, ShiftAmtVT));
2366 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2367 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2368 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2369 MaskLower, ShiftAmtBytes),
2374 MVT VT = Op.getValueType();
2375 SDValue ShiftAmt = Op.getOperand(1);
2376 MVT ShiftAmtVT = ShiftAmt.getValueType();
2377 SDValue ShiftAmtBytes =
2378 DAG.getNode(ISD::SRL, ShiftAmtVT,
2380 DAG.getConstant(3, ShiftAmtVT));
2381 SDValue ShiftAmtBits =
2382 DAG.getNode(ISD::AND, ShiftAmtVT,
2384 DAG.getConstant(7, ShiftAmtVT));
2386 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2387 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2388 Op0, ShiftAmtBytes),
2393 // Promote Op0 to vector
2395 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2396 SDValue ShiftAmt = Op.getOperand(1);
2397 MVT ShiftVT = ShiftAmt.getValueType();
2399 // Negate variable shift amounts
2400 if (!isa<ConstantSDNode>(ShiftAmt)) {
2401 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2402 DAG.getConstant(0, ShiftVT), ShiftAmt);
2405 SDValue UpperHalfSign =
2406 DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2407 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2408 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2409 Op0, DAG.getConstant(31, MVT::i32))));
2410 SDValue UpperHalfSignMask =
2411 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2412 SDValue UpperLowerMask =
2413 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2414 DAG.getConstant(0xff00, MVT::i16));
2415 SDValue UpperLowerSelect =
2416 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2417 UpperHalfSignMask, Op0, UpperLowerMask);
2418 SDValue RotateLeftBytes =
2419 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2420 UpperLowerSelect, ShiftAmt);
2421 SDValue RotateLeftBits =
2422 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2423 RotateLeftBytes, ShiftAmt);
2425 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2433 //! Lower byte immediate operations for v16i8 vectors:
2435 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2438 MVT VT = Op.getValueType();
2440 ConstVec = Op.getOperand(0);
2441 Arg = Op.getOperand(1);
2442 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2443 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2444 ConstVec = ConstVec.getOperand(0);
2446 ConstVec = Op.getOperand(1);
2447 Arg = Op.getOperand(0);
2448 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2449 ConstVec = ConstVec.getOperand(0);
2454 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2455 uint64_t VectorBits[2];
2456 uint64_t UndefBits[2];
2457 uint64_t SplatBits, SplatUndef;
2460 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2461 && isConstantSplat(VectorBits, UndefBits,
2462 VT.getVectorElementType().getSizeInBits(),
2463 SplatBits, SplatUndef, SplatSize)) {
2465 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2466 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2468 // Turn the BUILD_VECTOR into a set of target constants:
2469 for (size_t i = 0; i < tcVecSize; ++i)
2472 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2473 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2476 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2477 // lowered. Return the operation, rather than a null SDValue.
2481 //! Lower i32 multiplication
2482 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2484 switch (VT.getSimpleVT()) {
2486 cerr << "CellSPU: Unknown LowerMUL value type, got "
2487 << Op.getValueType().getMVTString()
2493 SDValue rA = Op.getOperand(0);
2494 SDValue rB = Op.getOperand(1);
2496 return DAG.getNode(ISD::ADD, MVT::i32,
2497 DAG.getNode(ISD::ADD, MVT::i32,
2498 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2499 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2500 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2507 //! Custom lowering for CTPOP (count population)
2509 Custom lowering code that counts the number ones in the input
2510 operand. SPU has such an instruction, but it counts the number of
2511 ones per byte, which then have to be accumulated.
2513 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2514 MVT VT = Op.getValueType();
2515 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2517 switch (VT.getSimpleVT()) {
2519 assert(false && "Invalid value type!");
2521 SDValue N = Op.getOperand(0);
2522 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2524 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2525 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2527 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2531 MachineFunction &MF = DAG.getMachineFunction();
2532 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2534 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2536 SDValue N = Op.getOperand(0);
2537 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2538 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2539 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2541 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2542 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2544 // CNTB_result becomes the chain to which all of the virtual registers
2545 // CNTB_reg, SUM1_reg become associated:
2546 SDValue CNTB_result =
2547 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2549 SDValue CNTB_rescopy =
2550 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2552 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2554 return DAG.getNode(ISD::AND, MVT::i16,
2555 DAG.getNode(ISD::ADD, MVT::i16,
2556 DAG.getNode(ISD::SRL, MVT::i16,
2563 MachineFunction &MF = DAG.getMachineFunction();
2564 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2566 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2567 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2569 SDValue N = Op.getOperand(0);
2570 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2571 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2572 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2573 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2575 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2576 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2578 // CNTB_result becomes the chain to which all of the virtual registers
2579 // CNTB_reg, SUM1_reg become associated:
2580 SDValue CNTB_result =
2581 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2583 SDValue CNTB_rescopy =
2584 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2587 DAG.getNode(ISD::SRL, MVT::i32,
2588 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2591 DAG.getNode(ISD::ADD, MVT::i32,
2592 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2594 SDValue Sum1_rescopy =
2595 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2598 DAG.getNode(ISD::SRL, MVT::i32,
2599 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2602 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2603 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2605 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2615 /// LowerOperation - Provide custom lowering hooks for some operations.
2618 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2620 unsigned Opc = (unsigned) Op.getOpcode();
2621 MVT VT = Op.getValueType();
2625 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2626 cerr << "Op.getOpcode() = " << Opc << "\n";
2627 cerr << "*Op.getNode():\n";
2628 Op.getNode()->dump();
2634 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2636 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2637 case ISD::ConstantPool:
2638 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2639 case ISD::GlobalAddress:
2640 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2641 case ISD::JumpTable:
2642 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2644 return LowerConstant(Op, DAG);
2645 case ISD::ConstantFP:
2646 return LowerConstantFP(Op, DAG);
2648 return LowerBRCOND(Op, DAG);
2649 case ISD::FORMAL_ARGUMENTS:
2650 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2652 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2654 return LowerRET(Op, DAG, getTargetMachine());
2657 // i8, i64 math ops:
2658 case ISD::ZERO_EXTEND:
2659 case ISD::SIGN_EXTEND:
2660 case ISD::ANY_EXTEND:
2669 return LowerI8Math(Op, DAG, Opc);
2670 else if (VT == MVT::i64)
2671 return LowerI64Math(Op, DAG, Opc);
2675 // Vector-related lowering.
2676 case ISD::BUILD_VECTOR:
2677 return LowerBUILD_VECTOR(Op, DAG);
2678 case ISD::SCALAR_TO_VECTOR:
2679 return LowerSCALAR_TO_VECTOR(Op, DAG);
2680 case ISD::VECTOR_SHUFFLE:
2681 return LowerVECTOR_SHUFFLE(Op, DAG);
2682 case ISD::EXTRACT_VECTOR_ELT:
2683 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2684 case ISD::INSERT_VECTOR_ELT:
2685 return LowerINSERT_VECTOR_ELT(Op, DAG);
2687 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2691 return LowerByteImmed(Op, DAG);
2693 // Vector and i8 multiply:
2696 return LowerVectorMUL(Op, DAG);
2697 else if (VT == MVT::i8)
2698 return LowerI8Math(Op, DAG, Opc);
2700 return LowerMUL(Op, DAG, VT, Opc);
2703 if (VT == MVT::f32 || VT == MVT::v4f32)
2704 return LowerFDIVf32(Op, DAG);
2705 // else if (Op.getValueType() == MVT::f64)
2706 // return LowerFDIVf64(Op, DAG);
2708 assert(0 && "Calling FDIV on unsupported MVT");
2711 return LowerCTPOP(Op, DAG);
2717 SDNode *SPUTargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG)
2720 unsigned Opc = (unsigned) N->getOpcode();
2721 MVT OpVT = N->getValueType(0);
2725 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2726 cerr << "Op.getOpcode() = " << Opc << "\n";
2727 cerr << "*Op.getNode():\n";
2735 /* Otherwise, return unchanged */
2739 //===----------------------------------------------------------------------===//
2740 // Target Optimization Hooks
2741 //===----------------------------------------------------------------------===//
2744 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2747 TargetMachine &TM = getTargetMachine();
2749 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2750 SelectionDAG &DAG = DCI.DAG;
2751 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2752 SDValue Result; // Initially, NULL result
2754 switch (N->getOpcode()) {
2757 SDValue Op1 = N->getOperand(1);
2759 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2760 SDValue Op01 = Op0.getOperand(1);
2761 if (Op01.getOpcode() == ISD::Constant
2762 || Op01.getOpcode() == ISD::TargetConstant) {
2763 // (add <const>, (SPUindirect <arg>, <const>)) ->
2764 // (SPUindirect <arg>, <const + const>)
2765 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2766 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2767 SDValue combinedConst =
2768 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2769 Op0.getValueType());
2771 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2772 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2773 DEBUG(cerr << "With: (SPUindirect <arg>, "
2774 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2775 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2776 Op0.getOperand(0), combinedConst);
2778 } else if (isa<ConstantSDNode>(Op0)
2779 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2780 SDValue Op11 = Op1.getOperand(1);
2781 if (Op11.getOpcode() == ISD::Constant
2782 || Op11.getOpcode() == ISD::TargetConstant) {
2783 // (add (SPUindirect <arg>, <const>), <const>) ->
2784 // (SPUindirect <arg>, <const + const>)
2785 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2786 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2787 SDValue combinedConst =
2788 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2789 Op0.getValueType());
2791 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2792 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2793 DEBUG(cerr << "With: (SPUindirect <arg>, "
2794 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2796 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2797 Op1.getOperand(0), combinedConst);
2802 case ISD::SIGN_EXTEND:
2803 case ISD::ZERO_EXTEND:
2804 case ISD::ANY_EXTEND: {
2805 if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2806 N->getValueType(0) == Op0.getValueType()) {
2807 // (any_extend (SPUextract_elt0 <arg>)) ->
2808 // (SPUextract_elt0 <arg>)
2809 // Types must match, however...
2810 DEBUG(cerr << "Replace: ");
2811 DEBUG(N->dump(&DAG));
2812 DEBUG(cerr << "\nWith: ");
2813 DEBUG(Op0.getNode()->dump(&DAG));
2814 DEBUG(cerr << "\n");
2820 case SPUISD::IndirectAddr: {
2821 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2822 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2823 if (CN->getZExtValue() == 0) {
2824 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2825 // (SPUaform <addr>, 0)
2827 DEBUG(cerr << "Replace: ");
2828 DEBUG(N->dump(&DAG));
2829 DEBUG(cerr << "\nWith: ");
2830 DEBUG(Op0.getNode()->dump(&DAG));
2831 DEBUG(cerr << "\n");
2838 case SPUISD::SHLQUAD_L_BITS:
2839 case SPUISD::SHLQUAD_L_BYTES:
2840 case SPUISD::VEC_SHL:
2841 case SPUISD::VEC_SRL:
2842 case SPUISD::VEC_SRA:
2843 case SPUISD::ROTQUAD_RZ_BYTES:
2844 case SPUISD::ROTQUAD_RZ_BITS: {
2845 SDValue Op1 = N->getOperand(1);
2847 if (isa<ConstantSDNode>(Op1)) {
2848 // Kill degenerate vector shifts:
2849 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2851 if (CN->getZExtValue() == 0) {
2857 case SPUISD::PROMOTE_SCALAR: {
2858 switch (Op0.getOpcode()) {
2861 case ISD::ANY_EXTEND:
2862 case ISD::ZERO_EXTEND:
2863 case ISD::SIGN_EXTEND: {
2864 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2866 // but only if the SPUpromote_scalar and <arg> types match.
2867 SDValue Op00 = Op0.getOperand(0);
2868 if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2869 SDValue Op000 = Op00.getOperand(0);
2870 if (Op000.getValueType() == N->getValueType(0)) {
2876 case SPUISD::EXTRACT_ELT0: {
2877 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2879 Result = Op0.getOperand(0);
2886 // Otherwise, return unchanged.
2888 if (Result.getNode()) {
2889 DEBUG(cerr << "\nReplace.SPU: ");
2890 DEBUG(N->dump(&DAG));
2891 DEBUG(cerr << "\nWith: ");
2892 DEBUG(Result.getNode()->dump(&DAG));
2893 DEBUG(cerr << "\n");
2900 //===----------------------------------------------------------------------===//
2901 // Inline Assembly Support
2902 //===----------------------------------------------------------------------===//
2904 /// getConstraintType - Given a constraint letter, return the type of
2905 /// constraint it is for this target.
2906 SPUTargetLowering::ConstraintType
2907 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2908 if (ConstraintLetter.size() == 1) {
2909 switch (ConstraintLetter[0]) {
2916 return C_RegisterClass;
2919 return TargetLowering::getConstraintType(ConstraintLetter);
2922 std::pair<unsigned, const TargetRegisterClass*>
2923 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2926 if (Constraint.size() == 1) {
2927 // GCC RS6000 Constraint Letters
2928 switch (Constraint[0]) {
2932 return std::make_pair(0U, SPU::R64CRegisterClass);
2933 return std::make_pair(0U, SPU::R32CRegisterClass);
2936 return std::make_pair(0U, SPU::R32FPRegisterClass);
2937 else if (VT == MVT::f64)
2938 return std::make_pair(0U, SPU::R64FPRegisterClass);
2941 return std::make_pair(0U, SPU::GPRCRegisterClass);
2945 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2948 //! Compute used/known bits for a SPU operand
2950 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2954 const SelectionDAG &DAG,
2955 unsigned Depth ) const {
2957 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2960 switch (Op.getOpcode()) {
2962 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2972 case SPUISD::PROMOTE_SCALAR: {
2973 SDValue Op0 = Op.getOperand(0);
2974 MVT Op0VT = Op0.getValueType();
2975 unsigned Op0VTBits = Op0VT.getSizeInBits();
2976 uint64_t InMask = Op0VT.getIntegerVTBitMask();
2977 KnownZero |= APInt(Op0VTBits, ~InMask, false);
2978 KnownOne |= APInt(Op0VTBits, InMask, false);
2982 case SPUISD::LDRESULT:
2983 case SPUISD::EXTRACT_ELT0:
2984 case SPUISD::EXTRACT_ELT0_CHAINED: {
2985 MVT OpVT = Op.getValueType();
2986 unsigned OpVTBits = OpVT.getSizeInBits();
2987 uint64_t InMask = OpVT.getIntegerVTBitMask();
2988 KnownZero |= APInt(OpVTBits, ~InMask, false);
2989 KnownOne |= APInt(OpVTBits, InMask, false);
2994 case EXTRACT_I1_ZEXT:
2995 case EXTRACT_I1_SEXT:
2996 case EXTRACT_I8_ZEXT:
2997 case EXTRACT_I8_SEXT:
3002 case SPUISD::SHLQUAD_L_BITS:
3003 case SPUISD::SHLQUAD_L_BYTES:
3004 case SPUISD::VEC_SHL:
3005 case SPUISD::VEC_SRL:
3006 case SPUISD::VEC_SRA:
3007 case SPUISD::VEC_ROTL:
3008 case SPUISD::VEC_ROTR:
3009 case SPUISD::ROTQUAD_RZ_BYTES:
3010 case SPUISD::ROTQUAD_RZ_BITS:
3011 case SPUISD::ROTBYTES_RIGHT_S:
3012 case SPUISD::ROTBYTES_LEFT:
3013 case SPUISD::ROTBYTES_LEFT_CHAINED:
3014 case SPUISD::SELECT_MASK:
3016 case SPUISD::FPInterp:
3017 case SPUISD::FPRecipEst:
3018 case SPUISD::SEXT32TO64:
3023 // LowerAsmOperandForConstraint
3025 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3026 char ConstraintLetter,
3028 std::vector<SDValue> &Ops,
3029 SelectionDAG &DAG) const {
3030 // Default, for the time being, to the base class handler
3031 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3035 /// isLegalAddressImmediate - Return true if the integer value can be used
3036 /// as the offset of the target addressing mode.
3037 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3038 const Type *Ty) const {
3039 // SPU's addresses are 256K:
3040 return (V > -(1 << 18) && V < (1 << 18) - 1);
3043 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3048 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3049 // The SPU target isn't yet aware of offsets.