1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT mapping to useful data for Cell SPU
41 struct valtype_map_s {
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDValue &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDValue &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // SPU has no sign or zero extended loads for i1, i8, i16:
134 setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote);
135 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
136 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
137 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
138 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
139 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
140 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
141 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
143 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
144 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
145 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
146 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
147 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
148 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
149 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
150 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
152 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
153 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
154 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
156 // SPU constant load actions are custom lowered:
157 setOperationAction(ISD::Constant, MVT::i64, Custom);
158 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
159 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
161 // SPU's loads and stores have to be custom lowered:
162 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
164 MVT VT = (MVT::SimpleValueType)sctype;
166 setOperationAction(ISD::LOAD, VT, Custom);
167 setOperationAction(ISD::STORE, VT, Custom);
170 // Custom lower BRCOND for i1, i8 to "promote" the result to
171 // i32 and i16, respectively.
172 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
174 // Expand the jumptable branches
175 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
176 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
177 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
179 // SPU has no intrinsics for these particular operations:
180 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
182 // PowerPC has no SREM/UREM instructions
183 setOperationAction(ISD::SREM, MVT::i32, Expand);
184 setOperationAction(ISD::UREM, MVT::i32, Expand);
185 setOperationAction(ISD::SREM, MVT::i64, Expand);
186 setOperationAction(ISD::UREM, MVT::i64, Expand);
188 // We don't support sin/cos/sqrt/fmod
189 setOperationAction(ISD::FSIN , MVT::f64, Expand);
190 setOperationAction(ISD::FCOS , MVT::f64, Expand);
191 setOperationAction(ISD::FREM , MVT::f64, Expand);
192 setOperationAction(ISD::FSIN , MVT::f32, Expand);
193 setOperationAction(ISD::FCOS , MVT::f32, Expand);
194 setOperationAction(ISD::FREM , MVT::f32, Expand);
196 // If we're enabling GP optimizations, use hardware square root
197 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
198 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
200 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
201 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
203 // SPU can do rotate right and left, so legalize it... but customize for i8
204 // because instructions don't exist.
206 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
208 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
209 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
210 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
212 setOperationAction(ISD::ROTL, MVT::i32, Legal);
213 setOperationAction(ISD::ROTL, MVT::i16, Legal);
214 setOperationAction(ISD::ROTL, MVT::i8, Custom);
215 // SPU has no native version of shift left/right for i8
216 setOperationAction(ISD::SHL, MVT::i8, Custom);
217 setOperationAction(ISD::SRL, MVT::i8, Custom);
218 setOperationAction(ISD::SRA, MVT::i8, Custom);
219 // And SPU needs custom lowering for shift left/right for i64
220 setOperationAction(ISD::SHL, MVT::i64, Custom);
221 setOperationAction(ISD::SRL, MVT::i64, Custom);
222 setOperationAction(ISD::SRA, MVT::i64, Custom);
224 // Custom lower i8, i32 and i64 multiplications
225 setOperationAction(ISD::MUL, MVT::i8, Custom);
226 setOperationAction(ISD::MUL, MVT::i32, Custom);
227 setOperationAction(ISD::MUL, MVT::i64, Custom);
229 // Need to custom handle (some) common i8, i64 math ops
230 setOperationAction(ISD::ADD, MVT::i64, Custom);
231 setOperationAction(ISD::SUB, MVT::i8, Custom);
232 setOperationAction(ISD::SUB, MVT::i64, Custom);
234 // SPU does not have BSWAP. It does have i32 support CTLZ.
235 // CTPOP has to be custom lowered.
236 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
237 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
239 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
240 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
241 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
242 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
244 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
245 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
247 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
249 // SPU has a version of select that implements (a&~c)|(b&c), just like
250 // select ought to work:
251 setOperationAction(ISD::SELECT, MVT::i1, Promote);
252 setOperationAction(ISD::SELECT, MVT::i8, Legal);
253 setOperationAction(ISD::SELECT, MVT::i16, Legal);
254 setOperationAction(ISD::SELECT, MVT::i32, Legal);
255 setOperationAction(ISD::SELECT, MVT::i64, Expand);
257 setOperationAction(ISD::SETCC, MVT::i1, Promote);
258 setOperationAction(ISD::SETCC, MVT::i8, Legal);
259 setOperationAction(ISD::SETCC, MVT::i16, Legal);
260 setOperationAction(ISD::SETCC, MVT::i32, Legal);
261 setOperationAction(ISD::SETCC, MVT::i64, Expand);
263 // Zero extension and sign extension for i64 have to be
265 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
266 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
267 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
269 // SPU has a legal FP -> signed INT instruction
270 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
271 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
272 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
273 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
275 // FDIV on SPU requires custom lowering
276 setOperationAction(ISD::FDIV, MVT::f32, Custom);
277 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
279 // SPU has [U|S]INT_TO_FP
280 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
281 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
282 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
283 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
284 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
285 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
286 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
287 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
289 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
290 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
291 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
292 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
294 // We cannot sextinreg(i1). Expand to shifts.
295 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
297 // Support label based line numbers.
298 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
299 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
301 // We want to legalize GlobalAddress and ConstantPool nodes into the
302 // appropriate instructions to materialize the address.
303 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
305 MVT VT = (MVT::SimpleValueType)sctype;
307 setOperationAction(ISD::GlobalAddress, VT, Custom);
308 setOperationAction(ISD::ConstantPool, VT, Custom);
309 setOperationAction(ISD::JumpTable, VT, Custom);
312 // RET must be custom lowered, to meet ABI requirements
313 setOperationAction(ISD::RET, MVT::Other, Custom);
315 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
316 setOperationAction(ISD::VASTART , MVT::Other, Custom);
318 // Use the default implementation.
319 setOperationAction(ISD::VAARG , MVT::Other, Expand);
320 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
321 setOperationAction(ISD::VAEND , MVT::Other, Expand);
322 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
323 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
324 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
325 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
327 // Cell SPU has instructions for converting between i64 and fp.
328 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
329 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
331 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
332 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
334 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
335 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
337 // First set operation action for all vector types to expand. Then we
338 // will selectively turn on ones that can be effectively codegen'd.
339 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
340 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
341 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
342 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
343 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
344 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
346 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
347 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
348 MVT VT = (MVT::SimpleValueType)i;
350 // add/sub are legal for all supported vector VT's.
351 setOperationAction(ISD::ADD , VT, Legal);
352 setOperationAction(ISD::SUB , VT, Legal);
353 // mul has to be custom lowered.
354 setOperationAction(ISD::MUL , VT, Custom);
356 setOperationAction(ISD::AND , VT, Legal);
357 setOperationAction(ISD::OR , VT, Legal);
358 setOperationAction(ISD::XOR , VT, Legal);
359 setOperationAction(ISD::LOAD , VT, Legal);
360 setOperationAction(ISD::SELECT, VT, Legal);
361 setOperationAction(ISD::STORE, VT, Legal);
363 // These operations need to be expanded:
364 setOperationAction(ISD::SDIV, VT, Expand);
365 setOperationAction(ISD::SREM, VT, Expand);
366 setOperationAction(ISD::UDIV, VT, Expand);
367 setOperationAction(ISD::UREM, VT, Expand);
368 setOperationAction(ISD::FDIV, VT, Custom);
370 // Custom lower build_vector, constant pool spills, insert and
371 // extract vector elements:
372 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
373 setOperationAction(ISD::ConstantPool, VT, Custom);
374 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
375 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
376 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
377 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
380 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
381 setOperationAction(ISD::AND, MVT::v16i8, Custom);
382 setOperationAction(ISD::OR, MVT::v16i8, Custom);
383 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
384 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
386 setShiftAmountType(MVT::i32);
387 setSetCCResultContents(ZeroOrOneSetCCResult);
389 setStackPointerRegisterToSaveRestore(SPU::R1);
391 // We have target-specific dag combine patterns for the following nodes:
392 setTargetDAGCombine(ISD::ADD);
393 setTargetDAGCombine(ISD::ZERO_EXTEND);
394 setTargetDAGCombine(ISD::SIGN_EXTEND);
395 setTargetDAGCombine(ISD::ANY_EXTEND);
397 computeRegisterProperties();
401 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
403 if (node_names.empty()) {
404 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
405 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
406 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
407 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
408 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
409 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
410 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
411 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
412 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
413 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
414 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
415 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
416 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
417 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
418 = "SPUISD::EXTRACT_ELT0_CHAINED";
419 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
420 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
421 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
422 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
423 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
424 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
425 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
426 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
427 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
428 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
429 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
430 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
431 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
432 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
433 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
434 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
435 "SPUISD::ROTQUAD_RZ_BYTES";
436 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
437 "SPUISD::ROTQUAD_RZ_BITS";
438 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
439 "SPUISD::ROTBYTES_RIGHT_S";
440 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
441 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
442 "SPUISD::ROTBYTES_LEFT_CHAINED";
443 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
444 "SPUISD::ROTBYTES_LEFT_BITS";
445 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
446 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
447 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
448 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
449 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
450 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
451 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
452 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
453 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
456 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
458 return ((i != node_names.end()) ? i->second : 0);
461 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
462 MVT VT = Op.getValueType();
469 //===----------------------------------------------------------------------===//
470 // Calling convention code:
471 //===----------------------------------------------------------------------===//
473 #include "SPUGenCallingConv.inc"
475 //===----------------------------------------------------------------------===//
476 // LowerOperation implementation
477 //===----------------------------------------------------------------------===//
479 /// Aligned load common code for CellSPU
481 \param[in] Op The SelectionDAG load or store operand
482 \param[in] DAG The selection DAG
483 \param[in] ST CellSPU subtarget information structure
484 \param[in,out] alignment Caller initializes this to the load or store node's
485 value from getAlignment(), may be updated while generating the aligned load
486 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
487 offset (divisible by 16, modulo 16 == 0)
488 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
489 offset of the preferred slot (modulo 16 != 0)
490 \param[in,out] VT Caller initializes this value type to the the load or store
491 node's loaded or stored value type; may be updated if an i1-extended load or
493 \param[out] was16aligned true if the base pointer had 16-byte alignment,
494 otherwise false. Can help to determine if the chunk needs to be rotated.
496 Both load and store lowering load a block of data aligned on a 16-byte
497 boundary. This is the common aligned load code shared between both.
500 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
502 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
503 MVT &VT, bool &was16aligned)
505 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
506 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
507 SDValue basePtr = LSN->getBasePtr();
508 SDValue chain = LSN->getChain();
510 if (basePtr.getOpcode() == ISD::ADD) {
511 SDValue Op1 = basePtr.getNode()->getOperand(1);
513 if (Op1.getOpcode() == ISD::Constant
514 || Op1.getOpcode() == ISD::TargetConstant) {
515 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
517 alignOffs = (int) CN->getZExtValue();
518 prefSlotOffs = (int) (alignOffs & 0xf);
520 // Adjust the rotation amount to ensure that the final result ends up in
521 // the preferred slot:
522 prefSlotOffs -= vtm->prefslot_byte;
523 basePtr = basePtr.getOperand(0);
525 // Loading from memory, can we adjust alignment?
526 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
527 SDValue APtr = basePtr.getOperand(0);
528 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
529 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
530 alignment = GSDN->getGlobal()->getAlignment();
535 prefSlotOffs = -vtm->prefslot_byte;
537 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
538 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
539 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
540 prefSlotOffs = (int) (alignOffs & 0xf);
541 prefSlotOffs -= vtm->prefslot_byte;
542 basePtr = DAG.getRegister(SPU::R1, VT);
545 prefSlotOffs = -vtm->prefslot_byte;
548 if (alignment == 16) {
549 // Realign the base pointer as a D-Form address:
550 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
551 basePtr = DAG.getNode(ISD::ADD, PtrVT,
553 DAG.getConstant((alignOffs & ~0xf), PtrVT));
556 // Emit the vector load:
558 return DAG.getLoad(MVT::v16i8, chain, basePtr,
559 LSN->getSrcValue(), LSN->getSrcValueOffset(),
560 LSN->isVolatile(), 16);
563 // Unaligned load or we're using the "large memory" model, which means that
564 // we have to be very pessimistic:
565 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
566 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
567 DAG.getConstant(0, PtrVT));
571 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
572 DAG.getConstant((alignOffs & ~0xf), PtrVT));
573 was16aligned = false;
574 return DAG.getLoad(MVT::v16i8, chain, basePtr,
575 LSN->getSrcValue(), LSN->getSrcValueOffset(),
576 LSN->isVolatile(), 16);
579 /// Custom lower loads for CellSPU
581 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
582 within a 16-byte block, we have to rotate to extract the requested element.
585 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
586 LoadSDNode *LN = cast<LoadSDNode>(Op);
587 SDValue the_chain = LN->getChain();
588 MVT VT = LN->getMemoryVT();
589 MVT OpVT = Op.getNode()->getValueType(0);
590 ISD::LoadExtType ExtType = LN->getExtensionType();
591 unsigned alignment = LN->getAlignment();
594 switch (LN->getAddressingMode()) {
595 case ISD::UNINDEXED: {
599 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
601 if (result.getNode() == 0)
604 the_chain = result.getValue(1);
605 // Rotate the chunk if necessary
608 if (rotamt != 0 || !was16aligned) {
609 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
614 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
616 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
617 LoadSDNode *LN1 = cast<LoadSDNode>(result);
618 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
619 DAG.getConstant(rotamt, PtrVT));
622 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
623 the_chain = result.getValue(1);
626 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
628 MVT vecVT = MVT::v16i8;
630 // Convert the loaded v16i8 vector to the appropriate vector type
631 // specified by the operand:
634 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
636 vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
639 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
640 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
641 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
642 the_chain = result.getValue(1);
644 // Handle the sign and zero-extending loads for i1 and i8:
647 if (ExtType == ISD::SEXTLOAD) {
648 NewOpC = (OpVT == MVT::i1
649 ? SPUISD::EXTRACT_I1_SEXT
650 : SPUISD::EXTRACT_I8_SEXT);
652 assert(ExtType == ISD::ZEXTLOAD);
653 NewOpC = (OpVT == MVT::i1
654 ? SPUISD::EXTRACT_I1_ZEXT
655 : SPUISD::EXTRACT_I8_ZEXT);
658 result = DAG.getNode(NewOpC, OpVT, result);
661 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
662 SDValue retops[2] = {
667 result = DAG.getNode(SPUISD::LDRESULT, retvts,
668 retops, sizeof(retops) / sizeof(retops[0]));
675 case ISD::LAST_INDEXED_MODE:
676 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
678 cerr << (unsigned) LN->getAddressingMode() << "\n";
686 /// Custom lower stores for CellSPU
688 All CellSPU stores are aligned to 16-byte boundaries, so for elements
689 within a 16-byte block, we have to generate a shuffle to insert the
690 requested element into its place, then store the resulting block.
693 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
694 StoreSDNode *SN = cast<StoreSDNode>(Op);
695 SDValue Value = SN->getValue();
696 MVT VT = Value.getValueType();
697 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
698 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
699 unsigned alignment = SN->getAlignment();
701 switch (SN->getAddressingMode()) {
702 case ISD::UNINDEXED: {
703 int chunk_offset, slot_offset;
706 // The vector type we really want to load from the 16-byte chunk, except
707 // in the case of MVT::i1, which has to be v16i8.
708 MVT vecVT, stVecVT = MVT::v16i8;
711 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
712 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
714 SDValue alignLoadVec =
715 AlignedLoad(Op, DAG, ST, SN, alignment,
716 chunk_offset, slot_offset, VT, was16aligned);
718 if (alignLoadVec.getNode() == 0)
721 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
722 SDValue basePtr = LN->getBasePtr();
723 SDValue the_chain = alignLoadVec.getValue(1);
724 SDValue theValue = SN->getValue();
728 && (theValue.getOpcode() == ISD::AssertZext
729 || theValue.getOpcode() == ISD::AssertSext)) {
730 // Drill down and get the value for zero- and sign-extended
732 theValue = theValue.getOperand(0);
737 SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
738 SDValue insertEltPtr;
741 // If the base pointer is already a D-form address, then just create
742 // a new D-form address with a slot offset and the orignal base pointer.
743 // Otherwise generate a D-form address with the slot offset relative
744 // to the stack pointer, which is always aligned.
745 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
746 DEBUG(basePtr.getNode()->dump(&DAG));
749 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
750 (basePtr.getOpcode() == ISD::ADD
751 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
752 insertEltPtr = basePtr;
754 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
757 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
758 result = DAG.getNode(SPUISD::SHUFB, vecVT,
759 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
761 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
763 result = DAG.getStore(the_chain, result, basePtr,
764 LN->getSrcValue(), LN->getSrcValueOffset(),
765 LN->isVolatile(), LN->getAlignment());
774 case ISD::LAST_INDEXED_MODE:
775 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
777 cerr << (unsigned) SN->getAddressingMode() << "\n";
785 /// Generate the address of a constant pool entry.
787 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
788 MVT PtrVT = Op.getValueType();
789 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
790 Constant *C = CP->getConstVal();
791 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
792 SDValue Zero = DAG.getConstant(0, PtrVT);
793 const TargetMachine &TM = DAG.getTarget();
795 if (TM.getRelocationModel() == Reloc::Static) {
796 if (!ST->usingLargeMem()) {
797 // Just return the SDValue with the constant pool address in it.
798 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
800 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
801 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
802 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
807 "LowerConstantPool: Relocation model other than static"
813 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
814 MVT PtrVT = Op.getValueType();
815 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
816 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
817 SDValue Zero = DAG.getConstant(0, PtrVT);
818 const TargetMachine &TM = DAG.getTarget();
820 if (TM.getRelocationModel() == Reloc::Static) {
821 if (!ST->usingLargeMem()) {
822 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
824 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
825 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
826 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
831 "LowerJumpTable: Relocation model other than static not supported.");
836 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
837 MVT PtrVT = Op.getValueType();
838 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
839 GlobalValue *GV = GSDN->getGlobal();
840 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
841 const TargetMachine &TM = DAG.getTarget();
842 SDValue Zero = DAG.getConstant(0, PtrVT);
844 if (TM.getRelocationModel() == Reloc::Static) {
845 if (!ST->usingLargeMem()) {
846 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
848 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
849 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
850 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
853 cerr << "LowerGlobalAddress: Relocation model other than static not "
862 //! Custom lower i64 integer constants
864 This code inserts all of the necessary juggling that needs to occur to load
865 a 64-bit constant into a register.
868 LowerConstant(SDValue Op, SelectionDAG &DAG) {
869 MVT VT = Op.getValueType();
870 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
872 if (VT == MVT::i64) {
873 SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
874 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
875 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
877 cerr << "LowerConstant: unhandled constant type "
887 //! Custom lower double precision floating point constants
889 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
890 MVT VT = Op.getValueType();
891 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
894 "LowerConstantFP: Node is not ConstantFPSDNode");
896 if (VT == MVT::f64) {
897 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
898 return DAG.getNode(ISD::BIT_CONVERT, VT,
899 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
905 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
907 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
909 SDValue Cond = Op.getOperand(1);
910 MVT CondVT = Cond.getValueType();
913 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
914 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
915 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
917 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
920 return SDValue(); // Unchanged
924 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
926 MachineFunction &MF = DAG.getMachineFunction();
927 MachineFrameInfo *MFI = MF.getFrameInfo();
928 MachineRegisterInfo &RegInfo = MF.getRegInfo();
929 SmallVector<SDValue, 8> ArgValues;
930 SDValue Root = Op.getOperand(0);
931 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
933 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
934 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
936 unsigned ArgOffset = SPUFrameInfo::minStackSize();
937 unsigned ArgRegIdx = 0;
938 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
940 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
942 // Add DAG nodes to load the arguments or copy them out of registers.
943 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
944 ArgNo != e; ++ArgNo) {
946 bool needsLoad = false;
947 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
948 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
950 switch (ObjectVT.getSimpleVT()) {
952 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
953 << ObjectVT.getMVTString()
958 if (!isVarArg && ArgRegIdx < NumArgRegs) {
959 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
960 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
961 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
968 if (!isVarArg && ArgRegIdx < NumArgRegs) {
969 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
970 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
971 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
978 if (!isVarArg && ArgRegIdx < NumArgRegs) {
979 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
980 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
981 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
988 if (!isVarArg && ArgRegIdx < NumArgRegs) {
989 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
990 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
991 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
998 if (!isVarArg && ArgRegIdx < NumArgRegs) {
999 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
1000 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
1008 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1009 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
1010 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1011 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
1023 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1024 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1025 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1026 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1034 // We need to load the argument to a virtual register if we determined above
1035 // that we ran out of physical registers of the appropriate type
1037 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1038 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1039 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1040 ArgOffset += StackSlotSize;
1043 ArgValues.push_back(ArgVal);
1046 // If the function takes variable number of arguments, make a frame index for
1047 // the start of the first vararg value... for expansion of llvm.va_start.
1049 VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1051 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1052 // If this function is vararg, store any remaining integer argument regs to
1053 // their spots on the stack so that they may be loaded by deferencing the
1054 // result of va_next.
1055 SmallVector<SDValue, 8> MemOps;
1056 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1057 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1058 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1059 SDValue Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1060 SDValue Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1061 MemOps.push_back(Store);
1062 // Increment the address by four for the next argument to store
1063 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1064 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1066 if (!MemOps.empty())
1067 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1070 ArgValues.push_back(Root);
1072 // Return the new list of results.
1073 return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1077 /// isLSAAddress - Return the immediate to use if the specified
1078 /// value is representable as a LSA address.
1079 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1080 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1083 int Addr = C->getZExtValue();
1084 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1085 (Addr << 14 >> 14) != Addr)
1086 return 0; // Top 14 bits have to be sext of immediate.
1088 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1093 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1094 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1095 SDValue Chain = TheCall->getChain();
1097 bool isVarArg = TheCall->isVarArg();
1098 bool isTailCall = TheCall->isTailCall();
1100 SDValue Callee = TheCall->getCallee();
1101 unsigned NumOps = TheCall->getNumArgs();
1102 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1103 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1104 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1106 // Handy pointer type
1107 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1109 // Accumulate how many bytes are to be pushed on the stack, including the
1110 // linkage area, and parameter passing area. According to the SPU ABI,
1111 // we minimally need space for [LR] and [SP]
1112 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1114 // Set up a copy of the stack pointer for use loading and storing any
1115 // arguments that may not fit in the registers available for argument
1117 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1119 // Figure out which arguments are going to go in registers, and which in
1121 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1122 unsigned ArgRegIdx = 0;
1124 // Keep track of registers passing arguments
1125 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1126 // And the arguments passed on the stack
1127 SmallVector<SDValue, 8> MemOpChains;
1129 for (unsigned i = 0; i != NumOps; ++i) {
1130 SDValue Arg = TheCall->getArg(i);
1132 // PtrOff will be used to store the current argument to the stack if a
1133 // register cannot be found for it.
1134 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1135 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1137 switch (Arg.getValueType().getSimpleVT()) {
1138 default: assert(0 && "Unexpected ValueType for argument!");
1142 if (ArgRegIdx != NumArgRegs) {
1143 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1145 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1146 ArgOffset += StackSlotSize;
1151 if (ArgRegIdx != NumArgRegs) {
1152 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1154 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1155 ArgOffset += StackSlotSize;
1162 if (ArgRegIdx != NumArgRegs) {
1163 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1165 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1166 ArgOffset += StackSlotSize;
1172 // Update number of stack bytes actually used, insert a call sequence start
1173 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1174 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1176 if (!MemOpChains.empty()) {
1177 // Adjust the stack pointer for the stack arguments.
1178 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1179 &MemOpChains[0], MemOpChains.size());
1182 // Build a sequence of copy-to-reg nodes chained together with token chain
1183 // and flag operands which copy the outgoing args into the appropriate regs.
1185 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1186 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1188 InFlag = Chain.getValue(1);
1191 SmallVector<SDValue, 8> Ops;
1192 unsigned CallOpc = SPUISD::CALL;
1194 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1195 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1196 // node so that legalize doesn't hack it.
1197 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1198 GlobalValue *GV = G->getGlobal();
1199 MVT CalleeVT = Callee.getValueType();
1200 SDValue Zero = DAG.getConstant(0, PtrVT);
1201 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1203 if (!ST->usingLargeMem()) {
1204 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1205 // style calls, otherwise, external symbols are BRASL calls. This assumes
1206 // that declared/defined symbols are in the same compilation unit and can
1207 // be reached through PC-relative jumps.
1210 // This may be an unsafe assumption for JIT and really large compilation
1212 if (GV->isDeclaration()) {
1213 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1215 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1218 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1220 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1222 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1223 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1224 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1225 // If this is an absolute destination address that appears to be a legal
1226 // local store address, use the munged value.
1227 Callee = SDValue(Dest, 0);
1230 Ops.push_back(Chain);
1231 Ops.push_back(Callee);
1233 // Add argument registers to the end of the list so that they are known live
1235 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1236 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1237 RegsToPass[i].second.getValueType()));
1239 if (InFlag.getNode())
1240 Ops.push_back(InFlag);
1241 // Returns a chain and a flag for retval copy to use.
1242 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1243 &Ops[0], Ops.size());
1244 InFlag = Chain.getValue(1);
1246 Chain = DAG.getCALLSEQ_END(Chain,
1247 DAG.getConstant(NumStackBytes, PtrVT),
1248 DAG.getConstant(0, PtrVT),
1250 if (TheCall->getValueType(0) != MVT::Other)
1251 InFlag = Chain.getValue(1);
1253 SDValue ResultVals[3];
1254 unsigned NumResults = 0;
1256 // If the call has results, copy the values out of the ret val registers.
1257 switch (TheCall->getValueType(0).getSimpleVT()) {
1258 default: assert(0 && "Unexpected ret value!");
1259 case MVT::Other: break;
1261 if (TheCall->getValueType(1) == MVT::i32) {
1262 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1263 ResultVals[0] = Chain.getValue(0);
1264 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1265 Chain.getValue(2)).getValue(1);
1266 ResultVals[1] = Chain.getValue(0);
1269 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1270 ResultVals[0] = Chain.getValue(0);
1275 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1276 ResultVals[0] = Chain.getValue(0);
1281 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1282 InFlag).getValue(1);
1283 ResultVals[0] = Chain.getValue(0);
1291 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1292 InFlag).getValue(1);
1293 ResultVals[0] = Chain.getValue(0);
1298 // If the function returns void, just return the chain.
1299 if (NumResults == 0)
1302 // Otherwise, merge everything together with a MERGE_VALUES node.
1303 ResultVals[NumResults++] = Chain;
1304 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1305 return Res.getValue(Op.getResNo());
1309 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1310 SmallVector<CCValAssign, 16> RVLocs;
1311 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1312 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1313 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1314 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1316 // If this is the first return lowered for this function, add the regs to the
1317 // liveout set for the function.
1318 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1319 for (unsigned i = 0; i != RVLocs.size(); ++i)
1320 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1323 SDValue Chain = Op.getOperand(0);
1326 // Copy the result values into the output registers.
1327 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1328 CCValAssign &VA = RVLocs[i];
1329 assert(VA.isRegLoc() && "Can only return in registers!");
1330 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1331 Flag = Chain.getValue(1);
1335 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1337 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1341 //===----------------------------------------------------------------------===//
1342 // Vector related lowering:
1343 //===----------------------------------------------------------------------===//
1345 static ConstantSDNode *
1346 getVecImm(SDNode *N) {
1347 SDValue OpVal(0, 0);
1349 // Check to see if this buildvec has a single non-undef value in its elements.
1350 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1351 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1352 if (OpVal.getNode() == 0)
1353 OpVal = N->getOperand(i);
1354 else if (OpVal != N->getOperand(i))
1358 if (OpVal.getNode() != 0) {
1359 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1364 return 0; // All UNDEF: use implicit def.; not Constant node
1367 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1368 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1370 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1372 if (ConstantSDNode *CN = getVecImm(N)) {
1373 uint64_t Value = CN->getZExtValue();
1374 if (ValueType == MVT::i64) {
1375 uint64_t UValue = CN->getZExtValue();
1376 uint32_t upper = uint32_t(UValue >> 32);
1377 uint32_t lower = uint32_t(UValue);
1380 Value = Value >> 32;
1382 if (Value <= 0x3ffff)
1383 return DAG.getConstant(Value, ValueType);
1389 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1390 /// and the value fits into a signed 16-bit constant, and if so, return the
1392 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1394 if (ConstantSDNode *CN = getVecImm(N)) {
1395 int64_t Value = CN->getSignExtended();
1396 if (ValueType == MVT::i64) {
1397 uint64_t UValue = CN->getZExtValue();
1398 uint32_t upper = uint32_t(UValue >> 32);
1399 uint32_t lower = uint32_t(UValue);
1402 Value = Value >> 32;
1404 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1405 return DAG.getConstant(Value, ValueType);
1412 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1413 /// and the value fits into a signed 10-bit constant, and if so, return the
1415 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1417 if (ConstantSDNode *CN = getVecImm(N)) {
1418 int64_t Value = CN->getSignExtended();
1419 if (ValueType == MVT::i64) {
1420 uint64_t UValue = CN->getZExtValue();
1421 uint32_t upper = uint32_t(UValue >> 32);
1422 uint32_t lower = uint32_t(UValue);
1425 Value = Value >> 32;
1427 if (isS10Constant(Value))
1428 return DAG.getConstant(Value, ValueType);
1434 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1435 /// and the value fits into a signed 8-bit constant, and if so, return the
1438 /// @note: The incoming vector is v16i8 because that's the only way we can load
1439 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1441 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1443 if (ConstantSDNode *CN = getVecImm(N)) {
1444 int Value = (int) CN->getZExtValue();
1445 if (ValueType == MVT::i16
1446 && Value <= 0xffff /* truncated from uint64_t */
1447 && ((short) Value >> 8) == ((short) Value & 0xff))
1448 return DAG.getConstant(Value & 0xff, ValueType);
1449 else if (ValueType == MVT::i8
1450 && (Value & 0xff) == Value)
1451 return DAG.getConstant(Value, ValueType);
1457 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1458 /// and the value fits into a signed 16-bit constant, and if so, return the
1460 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1462 if (ConstantSDNode *CN = getVecImm(N)) {
1463 uint64_t Value = CN->getZExtValue();
1464 if ((ValueType == MVT::i32
1465 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1466 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1467 return DAG.getConstant(Value >> 16, ValueType);
1473 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1474 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1475 if (ConstantSDNode *CN = getVecImm(N)) {
1476 return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i32);
1482 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1483 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1484 if (ConstantSDNode *CN = getVecImm(N)) {
1485 return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i64);
1491 // If this is a vector of constants or undefs, get the bits. A bit in
1492 // UndefBits is set if the corresponding element of the vector is an
1493 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1494 // zero. Return true if this is not an array of constants, false if it is.
1496 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1497 uint64_t UndefBits[2]) {
1498 // Start with zero'd results.
1499 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1501 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1502 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1503 SDValue OpVal = BV->getOperand(i);
1505 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1506 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1508 uint64_t EltBits = 0;
1509 if (OpVal.getOpcode() == ISD::UNDEF) {
1510 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1511 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1513 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1514 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1515 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1516 const APFloat &apf = CN->getValueAPF();
1517 EltBits = (CN->getValueType(0) == MVT::f32
1518 ? FloatToBits(apf.convertToFloat())
1519 : DoubleToBits(apf.convertToDouble()));
1521 // Nonconstant element.
1525 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1528 //printf("%llx %llx %llx %llx\n",
1529 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1533 /// If this is a splat (repetition) of a value across the whole vector, return
1534 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1535 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1536 /// SplatSize = 1 byte.
1537 static bool isConstantSplat(const uint64_t Bits128[2],
1538 const uint64_t Undef128[2],
1540 uint64_t &SplatBits, uint64_t &SplatUndef,
1542 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1543 // the same as the lower 64-bits, ignoring undefs.
1544 uint64_t Bits64 = Bits128[0] | Bits128[1];
1545 uint64_t Undef64 = Undef128[0] & Undef128[1];
1546 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1547 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1548 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1549 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1551 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1552 if (MinSplatBits < 64) {
1554 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1556 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1557 if (MinSplatBits < 32) {
1559 // If the top 16-bits are different than the lower 16-bits, ignoring
1560 // undefs, we have an i32 splat.
1561 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1562 if (MinSplatBits < 16) {
1563 // If the top 8-bits are different than the lower 8-bits, ignoring
1564 // undefs, we have an i16 splat.
1565 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1566 == ((Bits16 >> 8) & ~Undef16)) {
1567 // Otherwise, we have an 8-bit splat.
1568 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1569 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1575 SplatUndef = Undef16;
1582 SplatUndef = Undef32;
1588 SplatBits = Bits128[0];
1589 SplatUndef = Undef128[0];
1595 return false; // Can't be a splat if two pieces don't match.
1598 // If this is a case we can't handle, return null and let the default
1599 // expansion code take care of it. If we CAN select this case, and if it
1600 // selects to a single instruction, return Op. Otherwise, if we can codegen
1601 // this case more efficiently than a constant pool load, lower it to the
1602 // sequence of ops that should be used.
1603 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1604 MVT VT = Op.getValueType();
1605 // If this is a vector of constants or undefs, get the bits. A bit in
1606 // UndefBits is set if the corresponding element of the vector is an
1607 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1609 uint64_t VectorBits[2];
1610 uint64_t UndefBits[2];
1611 uint64_t SplatBits, SplatUndef;
1613 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1614 || !isConstantSplat(VectorBits, UndefBits,
1615 VT.getVectorElementType().getSizeInBits(),
1616 SplatBits, SplatUndef, SplatSize))
1617 return SDValue(); // Not a constant vector, not a splat.
1619 switch (VT.getSimpleVT()) {
1622 uint32_t Value32 = SplatBits;
1623 assert(SplatSize == 4
1624 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1625 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1626 SDValue T = DAG.getConstant(Value32, MVT::i32);
1627 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1628 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1632 uint64_t f64val = SplatBits;
1633 assert(SplatSize == 8
1634 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1635 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1636 SDValue T = DAG.getConstant(f64val, MVT::i64);
1637 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1638 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1642 // 8-bit constants have to be expanded to 16-bits
1643 unsigned short Value16 = SplatBits | (SplatBits << 8);
1645 for (int i = 0; i < 8; ++i)
1646 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1647 return DAG.getNode(ISD::BIT_CONVERT, VT,
1648 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1651 unsigned short Value16;
1653 Value16 = (unsigned short) (SplatBits & 0xffff);
1655 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1656 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1658 for (int i = 0; i < 8; ++i) Ops[i] = T;
1659 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1662 unsigned int Value = SplatBits;
1663 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1664 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1667 uint64_t val = SplatBits;
1668 uint32_t upper = uint32_t(val >> 32);
1669 uint32_t lower = uint32_t(val);
1671 if (upper == lower) {
1672 // Magic constant that can be matched by IL, ILA, et. al.
1673 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1674 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1678 SmallVector<SDValue, 16> ShufBytes;
1680 bool upper_special, lower_special;
1682 // NOTE: This code creates common-case shuffle masks that can be easily
1683 // detected as common expressions. It is not attempting to create highly
1684 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1686 // Detect if the upper or lower half is a special shuffle mask pattern:
1687 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1688 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1690 // Create lower vector if not a special pattern
1691 if (!lower_special) {
1692 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1693 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1694 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1695 LO32C, LO32C, LO32C, LO32C));
1698 // Create upper vector if not a special pattern
1699 if (!upper_special) {
1700 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1701 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1702 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1703 HI32C, HI32C, HI32C, HI32C));
1706 // If either upper or lower are special, then the two input operands are
1707 // the same (basically, one of them is a "don't care")
1712 if (lower_special && upper_special) {
1713 // Unhappy situation... both upper and lower are special, so punt with
1714 // a target constant:
1715 SDValue Zero = DAG.getConstant(0, MVT::i32);
1716 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1720 for (int i = 0; i < 4; ++i) {
1722 for (int j = 0; j < 4; ++j) {
1724 bool process_upper, process_lower;
1726 process_upper = (upper_special && (i & 1) == 0);
1727 process_lower = (lower_special && (i & 1) == 1);
1729 if (process_upper || process_lower) {
1730 if ((process_upper && upper == 0)
1731 || (process_lower && lower == 0))
1733 else if ((process_upper && upper == 0xffffffff)
1734 || (process_lower && lower == 0xffffffff))
1736 else if ((process_upper && upper == 0x80000000)
1737 || (process_lower && lower == 0x80000000))
1738 val |= (j == 0 ? 0xe0 : 0x80);
1740 val |= i * 4 + j + ((i & 1) * 16);
1743 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1746 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1747 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1748 &ShufBytes[0], ShufBytes.size()));
1756 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1757 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1758 /// permutation vector, V3, is monotonically increasing with one "exception"
1759 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1760 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1761 /// In either case, the net result is going to eventually invoke SHUFB to
1762 /// permute/shuffle the bytes from V1 and V2.
1764 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1765 /// control word for byte/halfword/word insertion. This takes care of a single
1766 /// element move from V2 into V1.
1768 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1769 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1770 SDValue V1 = Op.getOperand(0);
1771 SDValue V2 = Op.getOperand(1);
1772 SDValue PermMask = Op.getOperand(2);
1774 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1776 // If we have a single element being moved from V1 to V2, this can be handled
1777 // using the C*[DX] compute mask instructions, but the vector elements have
1778 // to be monotonically increasing with one exception element.
1779 MVT EltVT = V1.getValueType().getVectorElementType();
1780 unsigned EltsFromV2 = 0;
1782 unsigned V2EltIdx0 = 0;
1783 unsigned CurrElt = 0;
1784 bool monotonic = true;
1785 if (EltVT == MVT::i8)
1787 else if (EltVT == MVT::i16)
1789 else if (EltVT == MVT::i32)
1792 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1794 for (unsigned i = 0, e = PermMask.getNumOperands();
1795 EltsFromV2 <= 1 && monotonic && i != e;
1798 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1801 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1803 if (SrcElt >= V2EltIdx0) {
1805 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1806 } else if (CurrElt != SrcElt) {
1813 if (EltsFromV2 == 1 && monotonic) {
1814 // Compute mask and shuffle
1815 MachineFunction &MF = DAG.getMachineFunction();
1816 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1817 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1818 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1819 // Initialize temporary register to 0
1820 SDValue InitTempReg =
1821 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1822 // Copy register's contents as index in INSERT_MASK:
1823 SDValue ShufMaskOp =
1824 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1825 DAG.getTargetConstant(V2Elt, MVT::i32),
1826 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1827 // Use shuffle mask in SHUFB synthetic instruction:
1828 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1830 // Convert the SHUFFLE_VECTOR mask's input element units to the
1832 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1834 SmallVector<SDValue, 16> ResultMask;
1835 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1837 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1840 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1842 for (unsigned j = 0; j < BytesPerElement; ++j) {
1843 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1848 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1849 &ResultMask[0], ResultMask.size());
1850 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1854 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1855 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1857 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1858 // For a constant, build the appropriate constant vector, which will
1859 // eventually simplify to a vector register load.
1861 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1862 SmallVector<SDValue, 16> ConstVecValues;
1866 // Create a constant vector:
1867 switch (Op.getValueType().getSimpleVT()) {
1868 default: assert(0 && "Unexpected constant value type in "
1869 "LowerSCALAR_TO_VECTOR");
1870 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1871 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1872 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1873 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1874 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1875 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1878 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1879 for (size_t j = 0; j < n_copies; ++j)
1880 ConstVecValues.push_back(CValue);
1882 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1883 &ConstVecValues[0], ConstVecValues.size());
1885 // Otherwise, copy the value from one register to another:
1886 switch (Op0.getValueType().getSimpleVT()) {
1887 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1894 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1901 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1902 switch (Op.getValueType().getSimpleVT()) {
1904 cerr << "CellSPU: Unknown vector multiplication, got "
1905 << Op.getValueType().getMVTString()
1911 SDValue rA = Op.getOperand(0);
1912 SDValue rB = Op.getOperand(1);
1913 SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1914 SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1915 SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1916 SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1918 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1922 // Multiply two v8i16 vectors (pipeline friendly version):
1923 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1924 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1925 // c) Use SELB to select upper and lower halves from the intermediate results
1927 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1928 // dual-issue. This code does manage to do this, even if it's a little on
1931 MachineFunction &MF = DAG.getMachineFunction();
1932 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1933 SDValue Chain = Op.getOperand(0);
1934 SDValue rA = Op.getOperand(0);
1935 SDValue rB = Op.getOperand(1);
1936 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1937 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1940 DAG.getCopyToReg(Chain, FSMBIreg,
1941 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1942 DAG.getConstant(0xcccc, MVT::i16)));
1945 DAG.getCopyToReg(FSMBOp, HiProdReg,
1946 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1948 SDValue HHProd_v4i32 =
1949 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1950 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1952 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1953 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1954 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1955 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1957 DAG.getConstant(16, MVT::i16))),
1958 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1961 // This M00sE is N@stI! (apologies to Monty Python)
1963 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1964 // is to break it all apart, sign extend, and reassemble the various
1965 // intermediate products.
1967 SDValue rA = Op.getOperand(0);
1968 SDValue rB = Op.getOperand(1);
1969 SDValue c8 = DAG.getConstant(8, MVT::i32);
1970 SDValue c16 = DAG.getConstant(16, MVT::i32);
1973 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1974 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1975 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1977 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1979 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1982 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1983 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1985 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1986 DAG.getConstant(0x2222, MVT::i16));
1988 SDValue LoProdParts =
1989 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1990 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1991 LLProd, LHProd, FSMBmask));
1993 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1996 DAG.getNode(ISD::AND, MVT::v4i32,
1998 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1999 LoProdMask, LoProdMask,
2000 LoProdMask, LoProdMask));
2003 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2004 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
2007 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2008 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
2011 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2012 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2013 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2016 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2017 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2018 DAG.getNode(SPUISD::VEC_SRA,
2019 MVT::v4i32, rAH, c8)),
2020 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2021 DAG.getNode(SPUISD::VEC_SRA,
2022 MVT::v4i32, rBH, c8)));
2025 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2027 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2031 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2033 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2034 DAG.getNode(ISD::OR, MVT::v4i32,
2042 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2043 MachineFunction &MF = DAG.getMachineFunction();
2044 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2046 SDValue A = Op.getOperand(0);
2047 SDValue B = Op.getOperand(1);
2048 MVT VT = Op.getValueType();
2050 unsigned VRegBR, VRegC;
2052 if (VT == MVT::f32) {
2053 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2054 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2056 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2057 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2059 // TODO: make sure we're feeding FPInterp the right arguments
2060 // Right now: fi B, frest(B)
2063 // (Floating Interpolate (FP Reciprocal Estimate B))
2065 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2066 DAG.getNode(SPUISD::FPInterp, VT, B,
2067 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2069 // Computes A * BRcpl and stores in a temporary register
2071 DAG.getCopyToReg(BRcpl, VRegC,
2072 DAG.getNode(ISD::FMUL, VT, A,
2073 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2074 // What's the Chain variable do? It's magic!
2075 // TODO: set Chain = Op(0).getEntryNode()
2077 return DAG.getNode(ISD::FADD, VT,
2078 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2079 DAG.getNode(ISD::FMUL, VT,
2080 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2081 DAG.getNode(ISD::FSUB, VT, A,
2082 DAG.getNode(ISD::FMUL, VT, B,
2083 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2086 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2087 MVT VT = Op.getValueType();
2088 SDValue N = Op.getOperand(0);
2089 SDValue Elt = Op.getOperand(1);
2090 SDValue ShufMask[16];
2091 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2093 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2095 int EltNo = (int) C->getZExtValue();
2098 if (VT == MVT::i8 && EltNo >= 16)
2099 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2100 else if (VT == MVT::i16 && EltNo >= 8)
2101 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2102 else if (VT == MVT::i32 && EltNo >= 4)
2103 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2104 else if (VT == MVT::i64 && EltNo >= 2)
2105 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2107 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2108 // i32 and i64: Element 0 is the preferred slot
2109 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2112 // Need to generate shuffle mask and extract:
2113 int prefslot_begin = -1, prefslot_end = -1;
2114 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2116 switch (VT.getSimpleVT()) {
2118 assert(false && "Invalid value type!");
2120 prefslot_begin = prefslot_end = 3;
2124 prefslot_begin = 2; prefslot_end = 3;
2128 prefslot_begin = 0; prefslot_end = 3;
2132 prefslot_begin = 0; prefslot_end = 7;
2137 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2138 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2140 for (int i = 0; i < 16; ++i) {
2141 // zero fill uppper part of preferred slot, don't care about the
2143 unsigned int mask_val;
2145 if (i <= prefslot_end) {
2147 ((i < prefslot_begin)
2149 : elt_byte + (i - prefslot_begin));
2151 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2153 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2156 SDValue ShufMaskVec =
2157 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2159 sizeof(ShufMask) / sizeof(ShufMask[0]));
2161 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2162 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2163 N, N, ShufMaskVec));
2167 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2168 SDValue VecOp = Op.getOperand(0);
2169 SDValue ValOp = Op.getOperand(1);
2170 SDValue IdxOp = Op.getOperand(2);
2171 MVT VT = Op.getValueType();
2173 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2174 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2176 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2177 // Use $2 because it's always 16-byte aligned and it's available:
2178 SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2181 DAG.getNode(SPUISD::SHUFB, VT,
2182 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2184 DAG.getNode(SPUISD::INSERT_MASK, VT,
2185 DAG.getNode(ISD::ADD, PtrVT,
2187 DAG.getConstant(CN->getZExtValue(),
2193 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2195 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2197 assert(Op.getValueType() == MVT::i8);
2200 assert(0 && "Unhandled i8 math operator");
2204 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2206 SDValue N1 = Op.getOperand(1);
2207 N0 = (N0.getOpcode() != ISD::Constant
2208 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2209 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2211 N1 = (N1.getOpcode() != ISD::Constant
2212 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2213 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2215 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2216 DAG.getNode(Opc, MVT::i16, N0, N1));
2220 SDValue N1 = Op.getOperand(1);
2222 N0 = (N0.getOpcode() != ISD::Constant
2223 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2224 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2226 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2229 N1 = (N1.getOpcode() != ISD::Constant
2230 ? DAG.getNode(N1Opc, MVT::i16, N1)
2231 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2234 DAG.getNode(ISD::OR, MVT::i16, N0,
2235 DAG.getNode(ISD::SHL, MVT::i16,
2236 N0, DAG.getConstant(8, MVT::i16)));
2237 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2238 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2242 SDValue N1 = Op.getOperand(1);
2244 N0 = (N0.getOpcode() != ISD::Constant
2245 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2246 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2248 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2251 N1 = (N1.getOpcode() != ISD::Constant
2252 ? DAG.getNode(N1Opc, MVT::i16, N1)
2253 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2255 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2256 DAG.getNode(Opc, MVT::i16, N0, N1));
2259 SDValue N1 = Op.getOperand(1);
2261 N0 = (N0.getOpcode() != ISD::Constant
2262 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2263 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2265 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2268 N1 = (N1.getOpcode() != ISD::Constant
2269 ? DAG.getNode(N1Opc, MVT::i16, N1)
2270 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2272 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2273 DAG.getNode(Opc, MVT::i16, N0, N1));
2276 SDValue N1 = Op.getOperand(1);
2278 N0 = (N0.getOpcode() != ISD::Constant
2279 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2280 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2282 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2283 N1 = (N1.getOpcode() != ISD::Constant
2284 ? DAG.getNode(N1Opc, MVT::i16, N1)
2285 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2287 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2288 DAG.getNode(Opc, MVT::i16, N0, N1));
2296 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2298 MVT VT = Op.getValueType();
2299 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2301 SDValue Op0 = Op.getOperand(0);
2304 case ISD::ZERO_EXTEND:
2305 case ISD::SIGN_EXTEND:
2306 case ISD::ANY_EXTEND: {
2307 MVT Op0VT = Op0.getValueType();
2308 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2310 assert(Op0VT == MVT::i32
2311 && "CellSPU: Zero/sign extending something other than i32");
2312 DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2314 unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2315 ? SPUISD::ROTBYTES_RIGHT_S
2316 : SPUISD::ROTQUAD_RZ_BYTES);
2317 SDValue PromoteScalar =
2318 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2320 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2321 DAG.getNode(ISD::BIT_CONVERT, VecVT,
2322 DAG.getNode(NewOpc, Op0VecVT,
2324 DAG.getConstant(4, MVT::i32))));
2328 // Turn operands into vectors to satisfy type checking (shufb works on
2331 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2333 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2334 SmallVector<SDValue, 16> ShufBytes;
2336 // Create the shuffle mask for "rotating" the borrow up one register slot
2337 // once the borrow is generated.
2338 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2339 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2340 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2341 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2344 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2345 SDValue ShiftedCarry =
2346 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2348 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2349 &ShufBytes[0], ShufBytes.size()));
2351 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2352 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2353 Op0, Op1, ShiftedCarry));
2357 // Turn operands into vectors to satisfy type checking (shufb works on
2360 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2362 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2363 SmallVector<SDValue, 16> ShufBytes;
2365 // Create the shuffle mask for "rotating" the borrow up one register slot
2366 // once the borrow is generated.
2367 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2368 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2369 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2370 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2373 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2374 SDValue ShiftedBorrow =
2375 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2376 BorrowGen, BorrowGen,
2377 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2378 &ShufBytes[0], ShufBytes.size()));
2380 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2381 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2382 Op0, Op1, ShiftedBorrow));
2386 SDValue ShiftAmt = Op.getOperand(1);
2387 MVT ShiftAmtVT = ShiftAmt.getValueType();
2388 SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2390 DAG.getNode(SPUISD::SELB, VecVT,
2392 DAG.getConstant(0, VecVT),
2393 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2394 DAG.getConstant(0xff00ULL, MVT::i16)));
2395 SDValue ShiftAmtBytes =
2396 DAG.getNode(ISD::SRL, ShiftAmtVT,
2398 DAG.getConstant(3, ShiftAmtVT));
2399 SDValue ShiftAmtBits =
2400 DAG.getNode(ISD::AND, ShiftAmtVT,
2402 DAG.getConstant(7, ShiftAmtVT));
2404 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2405 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2406 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2407 MaskLower, ShiftAmtBytes),
2412 MVT VT = Op.getValueType();
2413 SDValue ShiftAmt = Op.getOperand(1);
2414 MVT ShiftAmtVT = ShiftAmt.getValueType();
2415 SDValue ShiftAmtBytes =
2416 DAG.getNode(ISD::SRL, ShiftAmtVT,
2418 DAG.getConstant(3, ShiftAmtVT));
2419 SDValue ShiftAmtBits =
2420 DAG.getNode(ISD::AND, ShiftAmtVT,
2422 DAG.getConstant(7, ShiftAmtVT));
2424 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2425 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2426 Op0, ShiftAmtBytes),
2431 // Promote Op0 to vector
2433 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2434 SDValue ShiftAmt = Op.getOperand(1);
2435 MVT ShiftVT = ShiftAmt.getValueType();
2437 // Negate variable shift amounts
2438 if (!isa<ConstantSDNode>(ShiftAmt)) {
2439 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2440 DAG.getConstant(0, ShiftVT), ShiftAmt);
2443 SDValue UpperHalfSign =
2444 DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2445 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2446 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2447 Op0, DAG.getConstant(31, MVT::i32))));
2448 SDValue UpperHalfSignMask =
2449 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2450 SDValue UpperLowerMask =
2451 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2452 DAG.getConstant(0xff00, MVT::i16));
2453 SDValue UpperLowerSelect =
2454 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2455 UpperHalfSignMask, Op0, UpperLowerMask);
2456 SDValue RotateLeftBytes =
2457 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2458 UpperLowerSelect, ShiftAmt);
2459 SDValue RotateLeftBits =
2460 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2461 RotateLeftBytes, ShiftAmt);
2463 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2471 //! Lower byte immediate operations for v16i8 vectors:
2473 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2476 MVT VT = Op.getValueType();
2478 ConstVec = Op.getOperand(0);
2479 Arg = Op.getOperand(1);
2480 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2481 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2482 ConstVec = ConstVec.getOperand(0);
2484 ConstVec = Op.getOperand(1);
2485 Arg = Op.getOperand(0);
2486 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2487 ConstVec = ConstVec.getOperand(0);
2492 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2493 uint64_t VectorBits[2];
2494 uint64_t UndefBits[2];
2495 uint64_t SplatBits, SplatUndef;
2498 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2499 && isConstantSplat(VectorBits, UndefBits,
2500 VT.getVectorElementType().getSizeInBits(),
2501 SplatBits, SplatUndef, SplatSize)) {
2503 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2504 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2506 // Turn the BUILD_VECTOR into a set of target constants:
2507 for (size_t i = 0; i < tcVecSize; ++i)
2510 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2511 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2514 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2515 // lowered. Return the operation, rather than a null SDValue.
2519 //! Lower i32 multiplication
2520 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2522 switch (VT.getSimpleVT()) {
2524 cerr << "CellSPU: Unknown LowerMUL value type, got "
2525 << Op.getValueType().getMVTString()
2531 SDValue rA = Op.getOperand(0);
2532 SDValue rB = Op.getOperand(1);
2534 return DAG.getNode(ISD::ADD, MVT::i32,
2535 DAG.getNode(ISD::ADD, MVT::i32,
2536 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2537 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2538 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2545 //! Custom lowering for CTPOP (count population)
2547 Custom lowering code that counts the number ones in the input
2548 operand. SPU has such an instruction, but it counts the number of
2549 ones per byte, which then have to be accumulated.
2551 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2552 MVT VT = Op.getValueType();
2553 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2555 switch (VT.getSimpleVT()) {
2557 assert(false && "Invalid value type!");
2559 SDValue N = Op.getOperand(0);
2560 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2562 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2563 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2565 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2569 MachineFunction &MF = DAG.getMachineFunction();
2570 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2572 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2574 SDValue N = Op.getOperand(0);
2575 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2576 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2577 SDValue Shift1 = DAG.getConstant(8, MVT::i16);
2579 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2580 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2582 // CNTB_result becomes the chain to which all of the virtual registers
2583 // CNTB_reg, SUM1_reg become associated:
2584 SDValue CNTB_result =
2585 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2587 SDValue CNTB_rescopy =
2588 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2590 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2592 return DAG.getNode(ISD::AND, MVT::i16,
2593 DAG.getNode(ISD::ADD, MVT::i16,
2594 DAG.getNode(ISD::SRL, MVT::i16,
2601 MachineFunction &MF = DAG.getMachineFunction();
2602 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2604 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2605 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2607 SDValue N = Op.getOperand(0);
2608 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2609 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2610 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2611 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2613 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2614 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2616 // CNTB_result becomes the chain to which all of the virtual registers
2617 // CNTB_reg, SUM1_reg become associated:
2618 SDValue CNTB_result =
2619 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2621 SDValue CNTB_rescopy =
2622 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2625 DAG.getNode(ISD::SRL, MVT::i32,
2626 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2629 DAG.getNode(ISD::ADD, MVT::i32,
2630 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2632 SDValue Sum1_rescopy =
2633 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2636 DAG.getNode(ISD::SRL, MVT::i32,
2637 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2640 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2641 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2643 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2653 /// LowerOperation - Provide custom lowering hooks for some operations.
2656 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2658 unsigned Opc = (unsigned) Op.getOpcode();
2659 MVT VT = Op.getValueType();
2663 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2664 cerr << "Op.getOpcode() = " << Opc << "\n";
2665 cerr << "*Op.getNode():\n";
2666 Op.getNode()->dump();
2672 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2674 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2675 case ISD::ConstantPool:
2676 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2677 case ISD::GlobalAddress:
2678 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2679 case ISD::JumpTable:
2680 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2682 return LowerConstant(Op, DAG);
2683 case ISD::ConstantFP:
2684 return LowerConstantFP(Op, DAG);
2686 return LowerBRCOND(Op, DAG);
2687 case ISD::FORMAL_ARGUMENTS:
2688 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2690 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2692 return LowerRET(Op, DAG, getTargetMachine());
2695 // i8, i64 math ops:
2696 case ISD::ZERO_EXTEND:
2697 case ISD::SIGN_EXTEND:
2698 case ISD::ANY_EXTEND:
2707 return LowerI8Math(Op, DAG, Opc);
2708 else if (VT == MVT::i64)
2709 return LowerI64Math(Op, DAG, Opc);
2713 // Vector-related lowering.
2714 case ISD::BUILD_VECTOR:
2715 return LowerBUILD_VECTOR(Op, DAG);
2716 case ISD::SCALAR_TO_VECTOR:
2717 return LowerSCALAR_TO_VECTOR(Op, DAG);
2718 case ISD::VECTOR_SHUFFLE:
2719 return LowerVECTOR_SHUFFLE(Op, DAG);
2720 case ISD::EXTRACT_VECTOR_ELT:
2721 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2722 case ISD::INSERT_VECTOR_ELT:
2723 return LowerINSERT_VECTOR_ELT(Op, DAG);
2725 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2729 return LowerByteImmed(Op, DAG);
2731 // Vector and i8 multiply:
2734 return LowerVectorMUL(Op, DAG);
2735 else if (VT == MVT::i8)
2736 return LowerI8Math(Op, DAG, Opc);
2738 return LowerMUL(Op, DAG, VT, Opc);
2741 if (VT == MVT::f32 || VT == MVT::v4f32)
2742 return LowerFDIVf32(Op, DAG);
2743 // else if (Op.getValueType() == MVT::f64)
2744 // return LowerFDIVf64(Op, DAG);
2746 assert(0 && "Calling FDIV on unsupported MVT");
2749 return LowerCTPOP(Op, DAG);
2755 //===----------------------------------------------------------------------===//
2756 // Target Optimization Hooks
2757 //===----------------------------------------------------------------------===//
2760 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2763 TargetMachine &TM = getTargetMachine();
2765 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2766 SelectionDAG &DAG = DCI.DAG;
2767 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2768 SDValue Result; // Initially, NULL result
2770 switch (N->getOpcode()) {
2773 SDValue Op1 = N->getOperand(1);
2775 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2776 SDValue Op01 = Op0.getOperand(1);
2777 if (Op01.getOpcode() == ISD::Constant
2778 || Op01.getOpcode() == ISD::TargetConstant) {
2779 // (add <const>, (SPUindirect <arg>, <const>)) ->
2780 // (SPUindirect <arg>, <const + const>)
2781 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2782 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2783 SDValue combinedConst =
2784 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2785 Op0.getValueType());
2787 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2788 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2789 DEBUG(cerr << "With: (SPUindirect <arg>, "
2790 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2791 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2792 Op0.getOperand(0), combinedConst);
2794 } else if (isa<ConstantSDNode>(Op0)
2795 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2796 SDValue Op11 = Op1.getOperand(1);
2797 if (Op11.getOpcode() == ISD::Constant
2798 || Op11.getOpcode() == ISD::TargetConstant) {
2799 // (add (SPUindirect <arg>, <const>), <const>) ->
2800 // (SPUindirect <arg>, <const + const>)
2801 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2802 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2803 SDValue combinedConst =
2804 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2805 Op0.getValueType());
2807 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2808 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2809 DEBUG(cerr << "With: (SPUindirect <arg>, "
2810 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2812 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2813 Op1.getOperand(0), combinedConst);
2818 case ISD::SIGN_EXTEND:
2819 case ISD::ZERO_EXTEND:
2820 case ISD::ANY_EXTEND: {
2821 if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2822 N->getValueType(0) == Op0.getValueType()) {
2823 // (any_extend (SPUextract_elt0 <arg>)) ->
2824 // (SPUextract_elt0 <arg>)
2825 // Types must match, however...
2826 DEBUG(cerr << "Replace: ");
2827 DEBUG(N->dump(&DAG));
2828 DEBUG(cerr << "\nWith: ");
2829 DEBUG(Op0.getNode()->dump(&DAG));
2830 DEBUG(cerr << "\n");
2836 case SPUISD::IndirectAddr: {
2837 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2838 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2839 if (CN->getZExtValue() == 0) {
2840 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2841 // (SPUaform <addr>, 0)
2843 DEBUG(cerr << "Replace: ");
2844 DEBUG(N->dump(&DAG));
2845 DEBUG(cerr << "\nWith: ");
2846 DEBUG(Op0.getNode()->dump(&DAG));
2847 DEBUG(cerr << "\n");
2854 case SPUISD::SHLQUAD_L_BITS:
2855 case SPUISD::SHLQUAD_L_BYTES:
2856 case SPUISD::VEC_SHL:
2857 case SPUISD::VEC_SRL:
2858 case SPUISD::VEC_SRA:
2859 case SPUISD::ROTQUAD_RZ_BYTES:
2860 case SPUISD::ROTQUAD_RZ_BITS: {
2861 SDValue Op1 = N->getOperand(1);
2863 if (isa<ConstantSDNode>(Op1)) {
2864 // Kill degenerate vector shifts:
2865 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2867 if (CN->getZExtValue() == 0) {
2873 case SPUISD::PROMOTE_SCALAR: {
2874 switch (Op0.getOpcode()) {
2877 case ISD::ANY_EXTEND:
2878 case ISD::ZERO_EXTEND:
2879 case ISD::SIGN_EXTEND: {
2880 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2882 // but only if the SPUpromote_scalar and <arg> types match.
2883 SDValue Op00 = Op0.getOperand(0);
2884 if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2885 SDValue Op000 = Op00.getOperand(0);
2886 if (Op000.getValueType() == N->getValueType(0)) {
2892 case SPUISD::EXTRACT_ELT0: {
2893 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2895 Result = Op0.getOperand(0);
2902 // Otherwise, return unchanged.
2904 if (Result.getNode()) {
2905 DEBUG(cerr << "\nReplace.SPU: ");
2906 DEBUG(N->dump(&DAG));
2907 DEBUG(cerr << "\nWith: ");
2908 DEBUG(Result.getNode()->dump(&DAG));
2909 DEBUG(cerr << "\n");
2916 //===----------------------------------------------------------------------===//
2917 // Inline Assembly Support
2918 //===----------------------------------------------------------------------===//
2920 /// getConstraintType - Given a constraint letter, return the type of
2921 /// constraint it is for this target.
2922 SPUTargetLowering::ConstraintType
2923 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2924 if (ConstraintLetter.size() == 1) {
2925 switch (ConstraintLetter[0]) {
2932 return C_RegisterClass;
2935 return TargetLowering::getConstraintType(ConstraintLetter);
2938 std::pair<unsigned, const TargetRegisterClass*>
2939 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2942 if (Constraint.size() == 1) {
2943 // GCC RS6000 Constraint Letters
2944 switch (Constraint[0]) {
2948 return std::make_pair(0U, SPU::R64CRegisterClass);
2949 return std::make_pair(0U, SPU::R32CRegisterClass);
2952 return std::make_pair(0U, SPU::R32FPRegisterClass);
2953 else if (VT == MVT::f64)
2954 return std::make_pair(0U, SPU::R64FPRegisterClass);
2957 return std::make_pair(0U, SPU::GPRCRegisterClass);
2961 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2964 //! Compute used/known bits for a SPU operand
2966 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2970 const SelectionDAG &DAG,
2971 unsigned Depth ) const {
2973 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2976 switch (Op.getOpcode()) {
2978 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2988 case SPUISD::PROMOTE_SCALAR: {
2989 SDValue Op0 = Op.getOperand(0);
2990 MVT Op0VT = Op0.getValueType();
2991 unsigned Op0VTBits = Op0VT.getSizeInBits();
2992 uint64_t InMask = Op0VT.getIntegerVTBitMask();
2993 KnownZero |= APInt(Op0VTBits, ~InMask, false);
2994 KnownOne |= APInt(Op0VTBits, InMask, false);
2998 case SPUISD::LDRESULT:
2999 case SPUISD::EXTRACT_ELT0:
3000 case SPUISD::EXTRACT_ELT0_CHAINED: {
3001 MVT OpVT = Op.getValueType();
3002 unsigned OpVTBits = OpVT.getSizeInBits();
3003 uint64_t InMask = OpVT.getIntegerVTBitMask();
3004 KnownZero |= APInt(OpVTBits, ~InMask, false);
3005 KnownOne |= APInt(OpVTBits, InMask, false);
3010 case EXTRACT_I1_ZEXT:
3011 case EXTRACT_I1_SEXT:
3012 case EXTRACT_I8_ZEXT:
3013 case EXTRACT_I8_SEXT:
3018 case SPUISD::SHLQUAD_L_BITS:
3019 case SPUISD::SHLQUAD_L_BYTES:
3020 case SPUISD::VEC_SHL:
3021 case SPUISD::VEC_SRL:
3022 case SPUISD::VEC_SRA:
3023 case SPUISD::VEC_ROTL:
3024 case SPUISD::VEC_ROTR:
3025 case SPUISD::ROTQUAD_RZ_BYTES:
3026 case SPUISD::ROTQUAD_RZ_BITS:
3027 case SPUISD::ROTBYTES_RIGHT_S:
3028 case SPUISD::ROTBYTES_LEFT:
3029 case SPUISD::ROTBYTES_LEFT_CHAINED:
3030 case SPUISD::SELECT_MASK:
3032 case SPUISD::FPInterp:
3033 case SPUISD::FPRecipEst:
3034 case SPUISD::SEXT32TO64:
3039 // LowerAsmOperandForConstraint
3041 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3042 char ConstraintLetter,
3044 std::vector<SDValue> &Ops,
3045 SelectionDAG &DAG) const {
3046 // Default, for the time being, to the base class handler
3047 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3051 /// isLegalAddressImmediate - Return true if the integer value can be used
3052 /// as the offset of the target addressing mode.
3053 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3054 const Type *Ty) const {
3055 // SPU's addresses are 256K:
3056 return (V > -(1 << 18) && V < (1 << 18) - 1);
3059 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {