1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT mapping to useful data for Cell SPU
41 struct valtype_map_s {
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDValue &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDValue &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // SPU has no sign or zero extended loads for i1, i8, i16:
134 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
135 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
136 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
137 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
138 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
139 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
140 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
141 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
143 setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
144 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
145 setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
146 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
147 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
148 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
149 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
150 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
152 setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
153 setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
154 setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
156 // SPU constant load actions are custom lowered:
157 setOperationAction(ISD::Constant, MVT::i64, Custom);
158 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
159 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
161 // SPU's loads and stores have to be custom lowered:
162 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
164 MVT VT = (MVT::SimpleValueType)sctype;
166 setOperationAction(ISD::LOAD, VT, Custom);
167 setOperationAction(ISD::STORE, VT, Custom);
170 // Custom lower BRCOND for i1, i8 to "promote" the result to
171 // i32 and i16, respectively.
172 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
174 // Expand the jumptable branches
175 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
176 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
177 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
179 // SPU has no intrinsics for these particular operations:
180 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
182 // PowerPC has no SREM/UREM instructions
183 setOperationAction(ISD::SREM, MVT::i32, Expand);
184 setOperationAction(ISD::UREM, MVT::i32, Expand);
185 setOperationAction(ISD::SREM, MVT::i64, Expand);
186 setOperationAction(ISD::UREM, MVT::i64, Expand);
188 // We don't support sin/cos/sqrt/fmod
189 setOperationAction(ISD::FSIN , MVT::f64, Expand);
190 setOperationAction(ISD::FCOS , MVT::f64, Expand);
191 setOperationAction(ISD::FREM , MVT::f64, Expand);
192 setOperationAction(ISD::FSIN , MVT::f32, Expand);
193 setOperationAction(ISD::FCOS , MVT::f32, Expand);
194 setOperationAction(ISD::FREM , MVT::f32, Expand);
196 // If we're enabling GP optimizations, use hardware square root
197 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
198 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
200 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
201 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
203 // SPU can do rotate right and left, so legalize it... but customize for i8
204 // because instructions don't exist.
206 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
208 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
209 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
210 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
212 setOperationAction(ISD::ROTL, MVT::i32, Legal);
213 setOperationAction(ISD::ROTL, MVT::i16, Legal);
214 setOperationAction(ISD::ROTL, MVT::i8, Custom);
215 // SPU has no native version of shift left/right for i8
216 setOperationAction(ISD::SHL, MVT::i8, Custom);
217 setOperationAction(ISD::SRL, MVT::i8, Custom);
218 setOperationAction(ISD::SRA, MVT::i8, Custom);
219 // And SPU needs custom lowering for shift left/right for i64
220 setOperationAction(ISD::SHL, MVT::i64, Custom);
221 setOperationAction(ISD::SRL, MVT::i64, Custom);
222 setOperationAction(ISD::SRA, MVT::i64, Custom);
224 // Custom lower i8, i32 and i64 multiplications
225 setOperationAction(ISD::MUL, MVT::i8, Custom);
226 setOperationAction(ISD::MUL, MVT::i32, Custom);
227 setOperationAction(ISD::MUL, MVT::i64, Custom);
229 // Need to custom handle (some) common i8, i64 math ops
230 setOperationAction(ISD::ADD, MVT::i64, Custom);
231 setOperationAction(ISD::SUB, MVT::i8, Custom);
232 setOperationAction(ISD::SUB, MVT::i64, Custom);
234 // SPU does not have BSWAP. It does have i32 support CTLZ.
235 // CTPOP has to be custom lowered.
236 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
237 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
239 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
240 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
241 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
242 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
244 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
245 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
247 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
249 // SPU has a version of select that implements (a&~c)|(b&c), just like
250 // select ought to work:
251 setOperationAction(ISD::SELECT, MVT::i1, Promote);
252 setOperationAction(ISD::SELECT, MVT::i8, Legal);
253 setOperationAction(ISD::SELECT, MVT::i16, Legal);
254 setOperationAction(ISD::SELECT, MVT::i32, Legal);
255 setOperationAction(ISD::SELECT, MVT::i64, Expand);
257 setOperationAction(ISD::SETCC, MVT::i1, Promote);
258 setOperationAction(ISD::SETCC, MVT::i8, Legal);
259 setOperationAction(ISD::SETCC, MVT::i16, Legal);
260 setOperationAction(ISD::SETCC, MVT::i32, Legal);
261 setOperationAction(ISD::SETCC, MVT::i64, Expand);
263 // Zero extension and sign extension for i64 have to be
265 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
266 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
267 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
269 // SPU has a legal FP -> signed INT instruction
270 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
271 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
272 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
273 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
275 // FDIV on SPU requires custom lowering
276 setOperationAction(ISD::FDIV, MVT::f32, Custom);
277 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
279 // SPU has [U|S]INT_TO_FP
280 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
281 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
282 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
283 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
284 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
285 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
286 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
287 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
289 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
290 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
291 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
292 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
294 // We cannot sextinreg(i1). Expand to shifts.
295 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
297 // Support label based line numbers.
298 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
299 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
301 // We want to legalize GlobalAddress and ConstantPool nodes into the
302 // appropriate instructions to materialize the address.
303 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
305 MVT VT = (MVT::SimpleValueType)sctype;
307 setOperationAction(ISD::GlobalAddress, VT, Custom);
308 setOperationAction(ISD::ConstantPool, VT, Custom);
309 setOperationAction(ISD::JumpTable, VT, Custom);
312 // RET must be custom lowered, to meet ABI requirements
313 setOperationAction(ISD::RET, MVT::Other, Custom);
315 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
316 setOperationAction(ISD::VASTART , MVT::Other, Custom);
318 // Use the default implementation.
319 setOperationAction(ISD::VAARG , MVT::Other, Expand);
320 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
321 setOperationAction(ISD::VAEND , MVT::Other, Expand);
322 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
323 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
324 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
325 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
327 // Cell SPU has instructions for converting between i64 and fp.
328 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
329 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
331 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
332 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
334 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
335 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
337 // First set operation action for all vector types to expand. Then we
338 // will selectively turn on ones that can be effectively codegen'd.
339 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
340 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
341 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
342 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
343 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
344 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
346 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
347 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
348 MVT VT = (MVT::SimpleValueType)i;
350 // add/sub are legal for all supported vector VT's.
351 setOperationAction(ISD::ADD , VT, Legal);
352 setOperationAction(ISD::SUB , VT, Legal);
353 // mul has to be custom lowered.
354 setOperationAction(ISD::MUL , VT, Custom);
356 setOperationAction(ISD::AND , VT, Legal);
357 setOperationAction(ISD::OR , VT, Legal);
358 setOperationAction(ISD::XOR , VT, Legal);
359 setOperationAction(ISD::LOAD , VT, Legal);
360 setOperationAction(ISD::SELECT, VT, Legal);
361 setOperationAction(ISD::STORE, VT, Legal);
363 // These operations need to be expanded:
364 setOperationAction(ISD::SDIV, VT, Expand);
365 setOperationAction(ISD::SREM, VT, Expand);
366 setOperationAction(ISD::UDIV, VT, Expand);
367 setOperationAction(ISD::UREM, VT, Expand);
368 setOperationAction(ISD::FDIV, VT, Custom);
370 // Custom lower build_vector, constant pool spills, insert and
371 // extract vector elements:
372 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
373 setOperationAction(ISD::ConstantPool, VT, Custom);
374 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
375 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
376 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
377 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
380 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
381 setOperationAction(ISD::AND, MVT::v16i8, Custom);
382 setOperationAction(ISD::OR, MVT::v16i8, Custom);
383 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
384 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
386 setShiftAmountType(MVT::i32);
387 setSetCCResultContents(ZeroOrOneSetCCResult);
389 setStackPointerRegisterToSaveRestore(SPU::R1);
391 // We have target-specific dag combine patterns for the following nodes:
392 setTargetDAGCombine(ISD::ADD);
393 setTargetDAGCombine(ISD::ZERO_EXTEND);
394 setTargetDAGCombine(ISD::SIGN_EXTEND);
395 setTargetDAGCombine(ISD::ANY_EXTEND);
397 computeRegisterProperties();
401 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
403 if (node_names.empty()) {
404 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
405 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
406 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
407 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
408 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
409 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
410 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
411 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
412 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
413 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
414 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
415 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
416 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
417 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
418 = "SPUISD::EXTRACT_ELT0_CHAINED";
419 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
420 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
421 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
422 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
423 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
424 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
425 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
426 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
427 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
428 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
429 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
430 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
431 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
432 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
433 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
434 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
435 "SPUISD::ROTQUAD_RZ_BYTES";
436 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
437 "SPUISD::ROTQUAD_RZ_BITS";
438 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
439 "SPUISD::ROTBYTES_RIGHT_S";
440 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
441 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
442 "SPUISD::ROTBYTES_LEFT_CHAINED";
443 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
444 "SPUISD::ROTBYTES_LEFT_BITS";
445 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
446 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
447 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
448 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
449 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
450 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
451 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
452 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
453 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
456 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
458 return ((i != node_names.end()) ? i->second : 0);
461 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
462 MVT VT = Op.getValueType();
469 //===----------------------------------------------------------------------===//
470 // Calling convention code:
471 //===----------------------------------------------------------------------===//
473 #include "SPUGenCallingConv.inc"
475 //===----------------------------------------------------------------------===//
476 // LowerOperation implementation
477 //===----------------------------------------------------------------------===//
479 /// Aligned load common code for CellSPU
481 \param[in] Op The SelectionDAG load or store operand
482 \param[in] DAG The selection DAG
483 \param[in] ST CellSPU subtarget information structure
484 \param[in,out] alignment Caller initializes this to the load or store node's
485 value from getAlignment(), may be updated while generating the aligned load
486 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
487 offset (divisible by 16, modulo 16 == 0)
488 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
489 offset of the preferred slot (modulo 16 != 0)
490 \param[in,out] VT Caller initializes this value type to the the load or store
491 node's loaded or stored value type; may be updated if an i1-extended load or
493 \param[out] was16aligned true if the base pointer had 16-byte alignment,
494 otherwise false. Can help to determine if the chunk needs to be rotated.
496 Both load and store lowering load a block of data aligned on a 16-byte
497 boundary. This is the common aligned load code shared between both.
500 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
502 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
503 MVT &VT, bool &was16aligned)
505 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
506 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
507 SDValue basePtr = LSN->getBasePtr();
508 SDValue chain = LSN->getChain();
510 if (basePtr.getOpcode() == ISD::ADD) {
511 SDValue Op1 = basePtr.getNode()->getOperand(1);
513 if (Op1.getOpcode() == ISD::Constant
514 || Op1.getOpcode() == ISD::TargetConstant) {
515 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
517 alignOffs = (int) CN->getZExtValue();
518 prefSlotOffs = (int) (alignOffs & 0xf);
520 // Adjust the rotation amount to ensure that the final result ends up in
521 // the preferred slot:
522 prefSlotOffs -= vtm->prefslot_byte;
523 basePtr = basePtr.getOperand(0);
525 // Loading from memory, can we adjust alignment?
526 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
527 SDValue APtr = basePtr.getOperand(0);
528 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
529 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
530 alignment = GSDN->getGlobal()->getAlignment();
535 prefSlotOffs = -vtm->prefslot_byte;
537 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
538 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
539 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
540 prefSlotOffs = (int) (alignOffs & 0xf);
541 prefSlotOffs -= vtm->prefslot_byte;
542 basePtr = DAG.getRegister(SPU::R1, VT);
545 prefSlotOffs = -vtm->prefslot_byte;
548 if (alignment == 16) {
549 // Realign the base pointer as a D-Form address:
550 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
551 basePtr = DAG.getNode(ISD::ADD, PtrVT,
553 DAG.getConstant((alignOffs & ~0xf), PtrVT));
556 // Emit the vector load:
558 return DAG.getLoad(MVT::v16i8, chain, basePtr,
559 LSN->getSrcValue(), LSN->getSrcValueOffset(),
560 LSN->isVolatile(), 16);
563 // Unaligned load or we're using the "large memory" model, which means that
564 // we have to be very pessimistic:
565 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
566 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
567 DAG.getConstant(0, PtrVT));
571 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
572 DAG.getConstant((alignOffs & ~0xf), PtrVT));
573 was16aligned = false;
574 return DAG.getLoad(MVT::v16i8, chain, basePtr,
575 LSN->getSrcValue(), LSN->getSrcValueOffset(),
576 LSN->isVolatile(), 16);
579 /// Custom lower loads for CellSPU
581 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
582 within a 16-byte block, we have to rotate to extract the requested element.
585 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
586 LoadSDNode *LN = cast<LoadSDNode>(Op);
587 SDValue the_chain = LN->getChain();
588 MVT VT = LN->getMemoryVT();
589 MVT OpVT = Op.getNode()->getValueType(0);
590 ISD::LoadExtType ExtType = LN->getExtensionType();
591 unsigned alignment = LN->getAlignment();
594 switch (LN->getAddressingMode()) {
595 case ISD::UNINDEXED: {
599 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
601 if (result.getNode() == 0)
604 the_chain = result.getValue(1);
605 // Rotate the chunk if necessary
608 if (rotamt != 0 || !was16aligned) {
609 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
614 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
616 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
617 LoadSDNode *LN1 = cast<LoadSDNode>(result);
618 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
619 DAG.getConstant(rotamt, PtrVT));
622 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
623 the_chain = result.getValue(1);
626 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
628 MVT vecVT = MVT::v16i8;
630 // Convert the loaded v16i8 vector to the appropriate vector type
631 // specified by the operand:
634 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
636 vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
639 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
640 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
641 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
642 the_chain = result.getValue(1);
644 // Handle the sign and zero-extending loads for i1 and i8:
647 if (ExtType == ISD::SEXTLOAD) {
648 NewOpC = (OpVT == MVT::i1
649 ? SPUISD::EXTRACT_I1_SEXT
650 : SPUISD::EXTRACT_I8_SEXT);
652 assert(ExtType == ISD::ZEXTLOAD);
653 NewOpC = (OpVT == MVT::i1
654 ? SPUISD::EXTRACT_I1_ZEXT
655 : SPUISD::EXTRACT_I8_ZEXT);
658 result = DAG.getNode(NewOpC, OpVT, result);
661 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
662 SDValue retops[2] = {
667 result = DAG.getNode(SPUISD::LDRESULT, retvts,
668 retops, sizeof(retops) / sizeof(retops[0]));
675 case ISD::LAST_INDEXED_MODE:
676 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
678 cerr << (unsigned) LN->getAddressingMode() << "\n";
686 /// Custom lower stores for CellSPU
688 All CellSPU stores are aligned to 16-byte boundaries, so for elements
689 within a 16-byte block, we have to generate a shuffle to insert the
690 requested element into its place, then store the resulting block.
693 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
694 StoreSDNode *SN = cast<StoreSDNode>(Op);
695 SDValue Value = SN->getValue();
696 MVT VT = Value.getValueType();
697 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
698 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
699 unsigned alignment = SN->getAlignment();
701 switch (SN->getAddressingMode()) {
702 case ISD::UNINDEXED: {
703 int chunk_offset, slot_offset;
706 // The vector type we really want to load from the 16-byte chunk, except
707 // in the case of MVT::i1, which has to be v16i8.
708 MVT vecVT, stVecVT = MVT::v16i8;
711 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
712 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
714 SDValue alignLoadVec =
715 AlignedLoad(Op, DAG, ST, SN, alignment,
716 chunk_offset, slot_offset, VT, was16aligned);
718 if (alignLoadVec.getNode() == 0)
721 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
722 SDValue basePtr = LN->getBasePtr();
723 SDValue the_chain = alignLoadVec.getValue(1);
724 SDValue theValue = SN->getValue();
728 && (theValue.getOpcode() == ISD::AssertZext
729 || theValue.getOpcode() == ISD::AssertSext)) {
730 // Drill down and get the value for zero- and sign-extended
732 theValue = theValue.getOperand(0);
737 SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
738 SDValue insertEltPtr;
741 // If the base pointer is already a D-form address, then just create
742 // a new D-form address with a slot offset and the orignal base pointer.
743 // Otherwise generate a D-form address with the slot offset relative
744 // to the stack pointer, which is always aligned.
745 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
746 DEBUG(basePtr.getNode()->dump(&DAG));
749 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
750 (basePtr.getOpcode() == ISD::ADD
751 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
752 insertEltPtr = basePtr;
754 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
757 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
758 result = DAG.getNode(SPUISD::SHUFB, vecVT,
759 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
761 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
763 result = DAG.getStore(the_chain, result, basePtr,
764 LN->getSrcValue(), LN->getSrcValueOffset(),
765 LN->isVolatile(), LN->getAlignment());
774 case ISD::LAST_INDEXED_MODE:
775 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
777 cerr << (unsigned) SN->getAddressingMode() << "\n";
785 /// Generate the address of a constant pool entry.
787 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
788 MVT PtrVT = Op.getValueType();
789 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
790 Constant *C = CP->getConstVal();
791 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
792 SDValue Zero = DAG.getConstant(0, PtrVT);
793 const TargetMachine &TM = DAG.getTarget();
795 if (TM.getRelocationModel() == Reloc::Static) {
796 if (!ST->usingLargeMem()) {
797 // Just return the SDValue with the constant pool address in it.
798 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
800 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
801 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
802 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
807 "LowerConstantPool: Relocation model other than static"
813 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
814 MVT PtrVT = Op.getValueType();
815 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
816 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
817 SDValue Zero = DAG.getConstant(0, PtrVT);
818 const TargetMachine &TM = DAG.getTarget();
820 if (TM.getRelocationModel() == Reloc::Static) {
821 if (!ST->usingLargeMem()) {
822 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
824 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
825 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
826 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
831 "LowerJumpTable: Relocation model other than static not supported.");
836 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
837 MVT PtrVT = Op.getValueType();
838 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
839 GlobalValue *GV = GSDN->getGlobal();
840 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
841 const TargetMachine &TM = DAG.getTarget();
842 SDValue Zero = DAG.getConstant(0, PtrVT);
844 if (TM.getRelocationModel() == Reloc::Static) {
845 if (!ST->usingLargeMem()) {
846 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
848 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
849 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
850 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
853 cerr << "LowerGlobalAddress: Relocation model other than static not "
862 //! Custom lower i64 integer constants
864 This code inserts all of the necessary juggling that needs to occur to load
865 a 64-bit constant into a register.
868 LowerConstant(SDValue Op, SelectionDAG &DAG) {
869 MVT VT = Op.getValueType();
870 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
872 if (VT == MVT::i64) {
873 SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
874 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
875 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
877 cerr << "LowerConstant: unhandled constant type "
887 //! Custom lower double precision floating point constants
889 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
890 MVT VT = Op.getValueType();
891 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
894 "LowerConstantFP: Node is not ConstantFPSDNode");
896 if (VT == MVT::f64) {
897 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
898 return DAG.getNode(ISD::BIT_CONVERT, VT,
899 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
905 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
907 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
909 SDValue Cond = Op.getOperand(1);
910 MVT CondVT = Cond.getValueType();
913 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
914 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
915 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
917 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
920 return SDValue(); // Unchanged
924 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
926 MachineFunction &MF = DAG.getMachineFunction();
927 MachineFrameInfo *MFI = MF.getFrameInfo();
928 MachineRegisterInfo &RegInfo = MF.getRegInfo();
929 SmallVector<SDValue, 8> ArgValues;
930 SDValue Root = Op.getOperand(0);
931 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
933 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
934 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
936 unsigned ArgOffset = SPUFrameInfo::minStackSize();
937 unsigned ArgRegIdx = 0;
938 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
940 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
942 // Add DAG nodes to load the arguments or copy them out of registers.
943 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
944 ArgNo != e; ++ArgNo) {
946 bool needsLoad = false;
947 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
948 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
950 switch (ObjectVT.getSimpleVT()) {
952 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
953 << ObjectVT.getMVTString()
958 if (!isVarArg && ArgRegIdx < NumArgRegs) {
959 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
960 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
961 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
968 if (!isVarArg && ArgRegIdx < NumArgRegs) {
969 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
970 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
971 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
978 if (!isVarArg && ArgRegIdx < NumArgRegs) {
979 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
980 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
981 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
988 if (!isVarArg && ArgRegIdx < NumArgRegs) {
989 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
990 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
991 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
998 if (!isVarArg && ArgRegIdx < NumArgRegs) {
999 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
1000 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
1008 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1009 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
1010 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1011 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
1023 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1024 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1025 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1026 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1034 // We need to load the argument to a virtual register if we determined above
1035 // that we ran out of physical registers of the appropriate type
1037 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1038 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1039 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1040 ArgOffset += StackSlotSize;
1043 ArgValues.push_back(ArgVal);
1046 // If the function takes variable number of arguments, make a frame index for
1047 // the start of the first vararg value... for expansion of llvm.va_start.
1049 VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1051 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1052 // If this function is vararg, store any remaining integer argument regs to
1053 // their spots on the stack so that they may be loaded by deferencing the
1054 // result of va_next.
1055 SmallVector<SDValue, 8> MemOps;
1056 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1057 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1058 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1059 SDValue Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1060 SDValue Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1061 MemOps.push_back(Store);
1062 // Increment the address by four for the next argument to store
1063 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1064 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1066 if (!MemOps.empty())
1067 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1070 ArgValues.push_back(Root);
1072 // Return the new list of results.
1073 return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1077 /// isLSAAddress - Return the immediate to use if the specified
1078 /// value is representable as a LSA address.
1079 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1080 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1083 int Addr = C->getZExtValue();
1084 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1085 (Addr << 14 >> 14) != Addr)
1086 return 0; // Top 14 bits have to be sext of immediate.
1088 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1093 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1094 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1095 SDValue Chain = TheCall->getChain();
1097 bool isVarArg = TheCall->isVarArg();
1098 bool isTailCall = TheCall->isTailCall();
1100 SDValue Callee = TheCall->getCallee();
1101 unsigned NumOps = TheCall->getNumArgs();
1102 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1103 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1104 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1106 // Handy pointer type
1107 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1109 // Accumulate how many bytes are to be pushed on the stack, including the
1110 // linkage area, and parameter passing area. According to the SPU ABI,
1111 // we minimally need space for [LR] and [SP]
1112 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1114 // Set up a copy of the stack pointer for use loading and storing any
1115 // arguments that may not fit in the registers available for argument
1117 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1119 // Figure out which arguments are going to go in registers, and which in
1121 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1122 unsigned ArgRegIdx = 0;
1124 // Keep track of registers passing arguments
1125 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1126 // And the arguments passed on the stack
1127 SmallVector<SDValue, 8> MemOpChains;
1129 for (unsigned i = 0; i != NumOps; ++i) {
1130 SDValue Arg = TheCall->getArg(i);
1132 // PtrOff will be used to store the current argument to the stack if a
1133 // register cannot be found for it.
1134 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1135 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1137 switch (Arg.getValueType().getSimpleVT()) {
1138 default: assert(0 && "Unexpected ValueType for argument!");
1142 if (ArgRegIdx != NumArgRegs) {
1143 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1145 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1146 ArgOffset += StackSlotSize;
1151 if (ArgRegIdx != NumArgRegs) {
1152 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1154 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1155 ArgOffset += StackSlotSize;
1162 if (ArgRegIdx != NumArgRegs) {
1163 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1165 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1166 ArgOffset += StackSlotSize;
1172 // Update number of stack bytes actually used, insert a call sequence start
1173 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1174 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1177 if (!MemOpChains.empty()) {
1178 // Adjust the stack pointer for the stack arguments.
1179 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1180 &MemOpChains[0], MemOpChains.size());
1183 // Build a sequence of copy-to-reg nodes chained together with token chain
1184 // and flag operands which copy the outgoing args into the appropriate regs.
1186 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1187 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1189 InFlag = Chain.getValue(1);
1192 SmallVector<SDValue, 8> Ops;
1193 unsigned CallOpc = SPUISD::CALL;
1195 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1196 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1197 // node so that legalize doesn't hack it.
1198 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1199 GlobalValue *GV = G->getGlobal();
1200 MVT CalleeVT = Callee.getValueType();
1201 SDValue Zero = DAG.getConstant(0, PtrVT);
1202 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1204 if (!ST->usingLargeMem()) {
1205 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1206 // style calls, otherwise, external symbols are BRASL calls. This assumes
1207 // that declared/defined symbols are in the same compilation unit and can
1208 // be reached through PC-relative jumps.
1211 // This may be an unsafe assumption for JIT and really large compilation
1213 if (GV->isDeclaration()) {
1214 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1216 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1219 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1221 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1223 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1224 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1225 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1226 // If this is an absolute destination address that appears to be a legal
1227 // local store address, use the munged value.
1228 Callee = SDValue(Dest, 0);
1231 Ops.push_back(Chain);
1232 Ops.push_back(Callee);
1234 // Add argument registers to the end of the list so that they are known live
1236 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1237 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1238 RegsToPass[i].second.getValueType()));
1240 if (InFlag.getNode())
1241 Ops.push_back(InFlag);
1242 // Returns a chain and a flag for retval copy to use.
1243 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1244 &Ops[0], Ops.size());
1245 InFlag = Chain.getValue(1);
1247 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1248 DAG.getIntPtrConstant(0, true), InFlag);
1249 if (TheCall->getValueType(0) != MVT::Other)
1250 InFlag = Chain.getValue(1);
1252 SDValue ResultVals[3];
1253 unsigned NumResults = 0;
1255 // If the call has results, copy the values out of the ret val registers.
1256 switch (TheCall->getValueType(0).getSimpleVT()) {
1257 default: assert(0 && "Unexpected ret value!");
1258 case MVT::Other: break;
1260 if (TheCall->getValueType(1) == MVT::i32) {
1261 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1262 ResultVals[0] = Chain.getValue(0);
1263 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1264 Chain.getValue(2)).getValue(1);
1265 ResultVals[1] = Chain.getValue(0);
1268 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1269 ResultVals[0] = Chain.getValue(0);
1274 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1275 ResultVals[0] = Chain.getValue(0);
1280 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1281 InFlag).getValue(1);
1282 ResultVals[0] = Chain.getValue(0);
1290 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1291 InFlag).getValue(1);
1292 ResultVals[0] = Chain.getValue(0);
1297 // If the function returns void, just return the chain.
1298 if (NumResults == 0)
1301 // Otherwise, merge everything together with a MERGE_VALUES node.
1302 ResultVals[NumResults++] = Chain;
1303 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1304 return Res.getValue(Op.getResNo());
1308 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1309 SmallVector<CCValAssign, 16> RVLocs;
1310 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1311 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1312 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1313 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1315 // If this is the first return lowered for this function, add the regs to the
1316 // liveout set for the function.
1317 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1318 for (unsigned i = 0; i != RVLocs.size(); ++i)
1319 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1322 SDValue Chain = Op.getOperand(0);
1325 // Copy the result values into the output registers.
1326 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1327 CCValAssign &VA = RVLocs[i];
1328 assert(VA.isRegLoc() && "Can only return in registers!");
1329 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1330 Flag = Chain.getValue(1);
1334 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1336 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1340 //===----------------------------------------------------------------------===//
1341 // Vector related lowering:
1342 //===----------------------------------------------------------------------===//
1344 static ConstantSDNode *
1345 getVecImm(SDNode *N) {
1346 SDValue OpVal(0, 0);
1348 // Check to see if this buildvec has a single non-undef value in its elements.
1349 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1350 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1351 if (OpVal.getNode() == 0)
1352 OpVal = N->getOperand(i);
1353 else if (OpVal != N->getOperand(i))
1357 if (OpVal.getNode() != 0) {
1358 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1363 return 0; // All UNDEF: use implicit def.; not Constant node
1366 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1367 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1369 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1371 if (ConstantSDNode *CN = getVecImm(N)) {
1372 uint64_t Value = CN->getZExtValue();
1373 if (ValueType == MVT::i64) {
1374 uint64_t UValue = CN->getZExtValue();
1375 uint32_t upper = uint32_t(UValue >> 32);
1376 uint32_t lower = uint32_t(UValue);
1379 Value = Value >> 32;
1381 if (Value <= 0x3ffff)
1382 return DAG.getConstant(Value, ValueType);
1388 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1389 /// and the value fits into a signed 16-bit constant, and if so, return the
1391 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1393 if (ConstantSDNode *CN = getVecImm(N)) {
1394 int64_t Value = CN->getSExtValue();
1395 if (ValueType == MVT::i64) {
1396 uint64_t UValue = CN->getZExtValue();
1397 uint32_t upper = uint32_t(UValue >> 32);
1398 uint32_t lower = uint32_t(UValue);
1401 Value = Value >> 32;
1403 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1404 return DAG.getConstant(Value, ValueType);
1411 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1412 /// and the value fits into a signed 10-bit constant, and if so, return the
1414 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1416 if (ConstantSDNode *CN = getVecImm(N)) {
1417 int64_t Value = CN->getSExtValue();
1418 if (ValueType == MVT::i64) {
1419 uint64_t UValue = CN->getZExtValue();
1420 uint32_t upper = uint32_t(UValue >> 32);
1421 uint32_t lower = uint32_t(UValue);
1424 Value = Value >> 32;
1426 if (isS10Constant(Value))
1427 return DAG.getConstant(Value, ValueType);
1433 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1434 /// and the value fits into a signed 8-bit constant, and if so, return the
1437 /// @note: The incoming vector is v16i8 because that's the only way we can load
1438 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1440 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1442 if (ConstantSDNode *CN = getVecImm(N)) {
1443 int Value = (int) CN->getZExtValue();
1444 if (ValueType == MVT::i16
1445 && Value <= 0xffff /* truncated from uint64_t */
1446 && ((short) Value >> 8) == ((short) Value & 0xff))
1447 return DAG.getConstant(Value & 0xff, ValueType);
1448 else if (ValueType == MVT::i8
1449 && (Value & 0xff) == Value)
1450 return DAG.getConstant(Value, ValueType);
1456 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1457 /// and the value fits into a signed 16-bit constant, and if so, return the
1459 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1461 if (ConstantSDNode *CN = getVecImm(N)) {
1462 uint64_t Value = CN->getZExtValue();
1463 if ((ValueType == MVT::i32
1464 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1465 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1466 return DAG.getConstant(Value >> 16, ValueType);
1472 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1473 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1474 if (ConstantSDNode *CN = getVecImm(N)) {
1475 return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i32);
1481 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1482 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1483 if (ConstantSDNode *CN = getVecImm(N)) {
1484 return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i64);
1490 // If this is a vector of constants or undefs, get the bits. A bit in
1491 // UndefBits is set if the corresponding element of the vector is an
1492 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1493 // zero. Return true if this is not an array of constants, false if it is.
1495 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1496 uint64_t UndefBits[2]) {
1497 // Start with zero'd results.
1498 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1500 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1501 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1502 SDValue OpVal = BV->getOperand(i);
1504 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1505 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1507 uint64_t EltBits = 0;
1508 if (OpVal.getOpcode() == ISD::UNDEF) {
1509 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1510 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1512 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1513 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1514 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1515 const APFloat &apf = CN->getValueAPF();
1516 EltBits = (CN->getValueType(0) == MVT::f32
1517 ? FloatToBits(apf.convertToFloat())
1518 : DoubleToBits(apf.convertToDouble()));
1520 // Nonconstant element.
1524 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1527 //printf("%llx %llx %llx %llx\n",
1528 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1532 /// If this is a splat (repetition) of a value across the whole vector, return
1533 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1534 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1535 /// SplatSize = 1 byte.
1536 static bool isConstantSplat(const uint64_t Bits128[2],
1537 const uint64_t Undef128[2],
1539 uint64_t &SplatBits, uint64_t &SplatUndef,
1541 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1542 // the same as the lower 64-bits, ignoring undefs.
1543 uint64_t Bits64 = Bits128[0] | Bits128[1];
1544 uint64_t Undef64 = Undef128[0] & Undef128[1];
1545 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1546 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1547 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1548 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1550 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1551 if (MinSplatBits < 64) {
1553 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1555 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1556 if (MinSplatBits < 32) {
1558 // If the top 16-bits are different than the lower 16-bits, ignoring
1559 // undefs, we have an i32 splat.
1560 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1561 if (MinSplatBits < 16) {
1562 // If the top 8-bits are different than the lower 8-bits, ignoring
1563 // undefs, we have an i16 splat.
1564 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1565 == ((Bits16 >> 8) & ~Undef16)) {
1566 // Otherwise, we have an 8-bit splat.
1567 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1568 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1574 SplatUndef = Undef16;
1581 SplatUndef = Undef32;
1587 SplatBits = Bits128[0];
1588 SplatUndef = Undef128[0];
1594 return false; // Can't be a splat if two pieces don't match.
1597 // If this is a case we can't handle, return null and let the default
1598 // expansion code take care of it. If we CAN select this case, and if it
1599 // selects to a single instruction, return Op. Otherwise, if we can codegen
1600 // this case more efficiently than a constant pool load, lower it to the
1601 // sequence of ops that should be used.
1602 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1603 MVT VT = Op.getValueType();
1604 // If this is a vector of constants or undefs, get the bits. A bit in
1605 // UndefBits is set if the corresponding element of the vector is an
1606 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1608 uint64_t VectorBits[2];
1609 uint64_t UndefBits[2];
1610 uint64_t SplatBits, SplatUndef;
1612 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1613 || !isConstantSplat(VectorBits, UndefBits,
1614 VT.getVectorElementType().getSizeInBits(),
1615 SplatBits, SplatUndef, SplatSize))
1616 return SDValue(); // Not a constant vector, not a splat.
1618 switch (VT.getSimpleVT()) {
1621 uint32_t Value32 = SplatBits;
1622 assert(SplatSize == 4
1623 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1624 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1625 SDValue T = DAG.getConstant(Value32, MVT::i32);
1626 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1627 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1631 uint64_t f64val = SplatBits;
1632 assert(SplatSize == 8
1633 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1634 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1635 SDValue T = DAG.getConstant(f64val, MVT::i64);
1636 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1637 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1641 // 8-bit constants have to be expanded to 16-bits
1642 unsigned short Value16 = SplatBits | (SplatBits << 8);
1644 for (int i = 0; i < 8; ++i)
1645 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1646 return DAG.getNode(ISD::BIT_CONVERT, VT,
1647 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1650 unsigned short Value16;
1652 Value16 = (unsigned short) (SplatBits & 0xffff);
1654 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1655 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1657 for (int i = 0; i < 8; ++i) Ops[i] = T;
1658 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1661 unsigned int Value = SplatBits;
1662 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1663 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1666 uint64_t val = SplatBits;
1667 uint32_t upper = uint32_t(val >> 32);
1668 uint32_t lower = uint32_t(val);
1670 if (upper == lower) {
1671 // Magic constant that can be matched by IL, ILA, et. al.
1672 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1673 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1677 SmallVector<SDValue, 16> ShufBytes;
1679 bool upper_special, lower_special;
1681 // NOTE: This code creates common-case shuffle masks that can be easily
1682 // detected as common expressions. It is not attempting to create highly
1683 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1685 // Detect if the upper or lower half is a special shuffle mask pattern:
1686 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1687 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1689 // Create lower vector if not a special pattern
1690 if (!lower_special) {
1691 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1692 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1693 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1694 LO32C, LO32C, LO32C, LO32C));
1697 // Create upper vector if not a special pattern
1698 if (!upper_special) {
1699 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1700 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1701 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1702 HI32C, HI32C, HI32C, HI32C));
1705 // If either upper or lower are special, then the two input operands are
1706 // the same (basically, one of them is a "don't care")
1711 if (lower_special && upper_special) {
1712 // Unhappy situation... both upper and lower are special, so punt with
1713 // a target constant:
1714 SDValue Zero = DAG.getConstant(0, MVT::i32);
1715 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1719 for (int i = 0; i < 4; ++i) {
1721 for (int j = 0; j < 4; ++j) {
1723 bool process_upper, process_lower;
1725 process_upper = (upper_special && (i & 1) == 0);
1726 process_lower = (lower_special && (i & 1) == 1);
1728 if (process_upper || process_lower) {
1729 if ((process_upper && upper == 0)
1730 || (process_lower && lower == 0))
1732 else if ((process_upper && upper == 0xffffffff)
1733 || (process_lower && lower == 0xffffffff))
1735 else if ((process_upper && upper == 0x80000000)
1736 || (process_lower && lower == 0x80000000))
1737 val |= (j == 0 ? 0xe0 : 0x80);
1739 val |= i * 4 + j + ((i & 1) * 16);
1742 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1745 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1746 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1747 &ShufBytes[0], ShufBytes.size()));
1755 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1756 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1757 /// permutation vector, V3, is monotonically increasing with one "exception"
1758 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1759 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1760 /// In either case, the net result is going to eventually invoke SHUFB to
1761 /// permute/shuffle the bytes from V1 and V2.
1763 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1764 /// control word for byte/halfword/word insertion. This takes care of a single
1765 /// element move from V2 into V1.
1767 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1768 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1769 SDValue V1 = Op.getOperand(0);
1770 SDValue V2 = Op.getOperand(1);
1771 SDValue PermMask = Op.getOperand(2);
1773 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1775 // If we have a single element being moved from V1 to V2, this can be handled
1776 // using the C*[DX] compute mask instructions, but the vector elements have
1777 // to be monotonically increasing with one exception element.
1778 MVT EltVT = V1.getValueType().getVectorElementType();
1779 unsigned EltsFromV2 = 0;
1781 unsigned V2EltIdx0 = 0;
1782 unsigned CurrElt = 0;
1783 bool monotonic = true;
1784 if (EltVT == MVT::i8)
1786 else if (EltVT == MVT::i16)
1788 else if (EltVT == MVT::i32)
1791 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1793 for (unsigned i = 0, e = PermMask.getNumOperands();
1794 EltsFromV2 <= 1 && monotonic && i != e;
1797 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1800 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1802 if (SrcElt >= V2EltIdx0) {
1804 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1805 } else if (CurrElt != SrcElt) {
1812 if (EltsFromV2 == 1 && monotonic) {
1813 // Compute mask and shuffle
1814 MachineFunction &MF = DAG.getMachineFunction();
1815 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1816 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1817 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1818 // Initialize temporary register to 0
1819 SDValue InitTempReg =
1820 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1821 // Copy register's contents as index in INSERT_MASK:
1822 SDValue ShufMaskOp =
1823 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1824 DAG.getTargetConstant(V2Elt, MVT::i32),
1825 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1826 // Use shuffle mask in SHUFB synthetic instruction:
1827 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1829 // Convert the SHUFFLE_VECTOR mask's input element units to the
1831 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1833 SmallVector<SDValue, 16> ResultMask;
1834 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1836 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1839 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1841 for (unsigned j = 0; j < BytesPerElement; ++j) {
1842 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1847 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1848 &ResultMask[0], ResultMask.size());
1849 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1853 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1854 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1856 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1857 // For a constant, build the appropriate constant vector, which will
1858 // eventually simplify to a vector register load.
1860 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1861 SmallVector<SDValue, 16> ConstVecValues;
1865 // Create a constant vector:
1866 switch (Op.getValueType().getSimpleVT()) {
1867 default: assert(0 && "Unexpected constant value type in "
1868 "LowerSCALAR_TO_VECTOR");
1869 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1870 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1871 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1872 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1873 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1874 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1877 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1878 for (size_t j = 0; j < n_copies; ++j)
1879 ConstVecValues.push_back(CValue);
1881 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1882 &ConstVecValues[0], ConstVecValues.size());
1884 // Otherwise, copy the value from one register to another:
1885 switch (Op0.getValueType().getSimpleVT()) {
1886 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1893 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1900 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1901 switch (Op.getValueType().getSimpleVT()) {
1903 cerr << "CellSPU: Unknown vector multiplication, got "
1904 << Op.getValueType().getMVTString()
1910 SDValue rA = Op.getOperand(0);
1911 SDValue rB = Op.getOperand(1);
1912 SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1913 SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1914 SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1915 SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1917 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1921 // Multiply two v8i16 vectors (pipeline friendly version):
1922 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1923 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1924 // c) Use SELB to select upper and lower halves from the intermediate results
1926 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1927 // dual-issue. This code does manage to do this, even if it's a little on
1930 MachineFunction &MF = DAG.getMachineFunction();
1931 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1932 SDValue Chain = Op.getOperand(0);
1933 SDValue rA = Op.getOperand(0);
1934 SDValue rB = Op.getOperand(1);
1935 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1936 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1939 DAG.getCopyToReg(Chain, FSMBIreg,
1940 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1941 DAG.getConstant(0xcccc, MVT::i16)));
1944 DAG.getCopyToReg(FSMBOp, HiProdReg,
1945 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1947 SDValue HHProd_v4i32 =
1948 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1949 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1951 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1952 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1953 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1954 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1956 DAG.getConstant(16, MVT::i16))),
1957 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1960 // This M00sE is N@stI! (apologies to Monty Python)
1962 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1963 // is to break it all apart, sign extend, and reassemble the various
1964 // intermediate products.
1966 SDValue rA = Op.getOperand(0);
1967 SDValue rB = Op.getOperand(1);
1968 SDValue c8 = DAG.getConstant(8, MVT::i32);
1969 SDValue c16 = DAG.getConstant(16, MVT::i32);
1972 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1973 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1974 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1976 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1978 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1981 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1982 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1984 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1985 DAG.getConstant(0x2222, MVT::i16));
1987 SDValue LoProdParts =
1988 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1989 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1990 LLProd, LHProd, FSMBmask));
1992 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1995 DAG.getNode(ISD::AND, MVT::v4i32,
1997 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1998 LoProdMask, LoProdMask,
1999 LoProdMask, LoProdMask));
2002 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2003 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
2006 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2007 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
2010 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2011 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2012 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2015 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2016 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2017 DAG.getNode(SPUISD::VEC_SRA,
2018 MVT::v4i32, rAH, c8)),
2019 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2020 DAG.getNode(SPUISD::VEC_SRA,
2021 MVT::v4i32, rBH, c8)));
2024 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2026 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2030 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2032 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2033 DAG.getNode(ISD::OR, MVT::v4i32,
2041 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2042 MachineFunction &MF = DAG.getMachineFunction();
2043 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2045 SDValue A = Op.getOperand(0);
2046 SDValue B = Op.getOperand(1);
2047 MVT VT = Op.getValueType();
2049 unsigned VRegBR, VRegC;
2051 if (VT == MVT::f32) {
2052 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2053 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2055 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2056 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2058 // TODO: make sure we're feeding FPInterp the right arguments
2059 // Right now: fi B, frest(B)
2062 // (Floating Interpolate (FP Reciprocal Estimate B))
2064 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2065 DAG.getNode(SPUISD::FPInterp, VT, B,
2066 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2068 // Computes A * BRcpl and stores in a temporary register
2070 DAG.getCopyToReg(BRcpl, VRegC,
2071 DAG.getNode(ISD::FMUL, VT, A,
2072 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2073 // What's the Chain variable do? It's magic!
2074 // TODO: set Chain = Op(0).getEntryNode()
2076 return DAG.getNode(ISD::FADD, VT,
2077 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2078 DAG.getNode(ISD::FMUL, VT,
2079 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2080 DAG.getNode(ISD::FSUB, VT, A,
2081 DAG.getNode(ISD::FMUL, VT, B,
2082 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2085 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2086 MVT VT = Op.getValueType();
2087 SDValue N = Op.getOperand(0);
2088 SDValue Elt = Op.getOperand(1);
2089 SDValue ShufMask[16];
2090 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2092 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2094 int EltNo = (int) C->getZExtValue();
2097 if (VT == MVT::i8 && EltNo >= 16)
2098 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2099 else if (VT == MVT::i16 && EltNo >= 8)
2100 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2101 else if (VT == MVT::i32 && EltNo >= 4)
2102 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2103 else if (VT == MVT::i64 && EltNo >= 2)
2104 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2106 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2107 // i32 and i64: Element 0 is the preferred slot
2108 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2111 // Need to generate shuffle mask and extract:
2112 int prefslot_begin = -1, prefslot_end = -1;
2113 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2115 switch (VT.getSimpleVT()) {
2117 assert(false && "Invalid value type!");
2119 prefslot_begin = prefslot_end = 3;
2123 prefslot_begin = 2; prefslot_end = 3;
2127 prefslot_begin = 0; prefslot_end = 3;
2131 prefslot_begin = 0; prefslot_end = 7;
2136 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2137 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2139 for (int i = 0; i < 16; ++i) {
2140 // zero fill uppper part of preferred slot, don't care about the
2142 unsigned int mask_val;
2144 if (i <= prefslot_end) {
2146 ((i < prefslot_begin)
2148 : elt_byte + (i - prefslot_begin));
2150 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2152 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2155 SDValue ShufMaskVec =
2156 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2158 sizeof(ShufMask) / sizeof(ShufMask[0]));
2160 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2161 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2162 N, N, ShufMaskVec));
2166 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2167 SDValue VecOp = Op.getOperand(0);
2168 SDValue ValOp = Op.getOperand(1);
2169 SDValue IdxOp = Op.getOperand(2);
2170 MVT VT = Op.getValueType();
2172 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2173 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2175 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2176 // Use $2 because it's always 16-byte aligned and it's available:
2177 SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2180 DAG.getNode(SPUISD::SHUFB, VT,
2181 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2183 DAG.getNode(SPUISD::INSERT_MASK, VT,
2184 DAG.getNode(ISD::ADD, PtrVT,
2186 DAG.getConstant(CN->getZExtValue(),
2192 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2194 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2196 assert(Op.getValueType() == MVT::i8);
2199 assert(0 && "Unhandled i8 math operator");
2203 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2205 SDValue N1 = Op.getOperand(1);
2206 N0 = (N0.getOpcode() != ISD::Constant
2207 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2208 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2210 N1 = (N1.getOpcode() != ISD::Constant
2211 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2212 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2214 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2215 DAG.getNode(Opc, MVT::i16, N0, N1));
2219 SDValue N1 = Op.getOperand(1);
2221 N0 = (N0.getOpcode() != ISD::Constant
2222 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2223 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2225 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2228 N1 = (N1.getOpcode() != ISD::Constant
2229 ? DAG.getNode(N1Opc, MVT::i16, N1)
2230 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2233 DAG.getNode(ISD::OR, MVT::i16, N0,
2234 DAG.getNode(ISD::SHL, MVT::i16,
2235 N0, DAG.getConstant(8, MVT::i16)));
2236 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2237 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2241 SDValue N1 = Op.getOperand(1);
2243 N0 = (N0.getOpcode() != ISD::Constant
2244 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2245 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2247 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2250 N1 = (N1.getOpcode() != ISD::Constant
2251 ? DAG.getNode(N1Opc, MVT::i16, N1)
2252 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2254 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2255 DAG.getNode(Opc, MVT::i16, N0, N1));
2258 SDValue N1 = Op.getOperand(1);
2260 N0 = (N0.getOpcode() != ISD::Constant
2261 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2262 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2264 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2267 N1 = (N1.getOpcode() != ISD::Constant
2268 ? DAG.getNode(N1Opc, MVT::i16, N1)
2269 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2271 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2272 DAG.getNode(Opc, MVT::i16, N0, N1));
2275 SDValue N1 = Op.getOperand(1);
2277 N0 = (N0.getOpcode() != ISD::Constant
2278 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2279 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2281 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2282 N1 = (N1.getOpcode() != ISD::Constant
2283 ? DAG.getNode(N1Opc, MVT::i16, N1)
2284 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2286 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2287 DAG.getNode(Opc, MVT::i16, N0, N1));
2295 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2297 MVT VT = Op.getValueType();
2298 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2300 SDValue Op0 = Op.getOperand(0);
2303 case ISD::ZERO_EXTEND:
2304 case ISD::SIGN_EXTEND:
2305 case ISD::ANY_EXTEND: {
2306 MVT Op0VT = Op0.getValueType();
2307 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2309 assert(Op0VT == MVT::i32
2310 && "CellSPU: Zero/sign extending something other than i32");
2311 DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2313 unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2314 ? SPUISD::ROTBYTES_RIGHT_S
2315 : SPUISD::ROTQUAD_RZ_BYTES);
2316 SDValue PromoteScalar =
2317 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2319 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2320 DAG.getNode(ISD::BIT_CONVERT, VecVT,
2321 DAG.getNode(NewOpc, Op0VecVT,
2323 DAG.getConstant(4, MVT::i32))));
2327 // Turn operands into vectors to satisfy type checking (shufb works on
2330 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2332 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2333 SmallVector<SDValue, 16> ShufBytes;
2335 // Create the shuffle mask for "rotating" the borrow up one register slot
2336 // once the borrow is generated.
2337 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2338 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2339 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2340 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2343 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2344 SDValue ShiftedCarry =
2345 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2347 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2348 &ShufBytes[0], ShufBytes.size()));
2350 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2351 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2352 Op0, Op1, ShiftedCarry));
2356 // Turn operands into vectors to satisfy type checking (shufb works on
2359 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2361 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2362 SmallVector<SDValue, 16> ShufBytes;
2364 // Create the shuffle mask for "rotating" the borrow up one register slot
2365 // once the borrow is generated.
2366 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2367 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2368 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2369 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2372 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2373 SDValue ShiftedBorrow =
2374 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2375 BorrowGen, BorrowGen,
2376 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2377 &ShufBytes[0], ShufBytes.size()));
2379 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2380 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2381 Op0, Op1, ShiftedBorrow));
2385 SDValue ShiftAmt = Op.getOperand(1);
2386 MVT ShiftAmtVT = ShiftAmt.getValueType();
2387 SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2389 DAG.getNode(SPUISD::SELB, VecVT,
2391 DAG.getConstant(0, VecVT),
2392 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2393 DAG.getConstant(0xff00ULL, MVT::i16)));
2394 SDValue ShiftAmtBytes =
2395 DAG.getNode(ISD::SRL, ShiftAmtVT,
2397 DAG.getConstant(3, ShiftAmtVT));
2398 SDValue ShiftAmtBits =
2399 DAG.getNode(ISD::AND, ShiftAmtVT,
2401 DAG.getConstant(7, ShiftAmtVT));
2403 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2404 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2405 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2406 MaskLower, ShiftAmtBytes),
2411 MVT VT = Op.getValueType();
2412 SDValue ShiftAmt = Op.getOperand(1);
2413 MVT ShiftAmtVT = ShiftAmt.getValueType();
2414 SDValue ShiftAmtBytes =
2415 DAG.getNode(ISD::SRL, ShiftAmtVT,
2417 DAG.getConstant(3, ShiftAmtVT));
2418 SDValue ShiftAmtBits =
2419 DAG.getNode(ISD::AND, ShiftAmtVT,
2421 DAG.getConstant(7, ShiftAmtVT));
2423 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2424 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2425 Op0, ShiftAmtBytes),
2430 // Promote Op0 to vector
2432 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2433 SDValue ShiftAmt = Op.getOperand(1);
2434 MVT ShiftVT = ShiftAmt.getValueType();
2436 // Negate variable shift amounts
2437 if (!isa<ConstantSDNode>(ShiftAmt)) {
2438 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2439 DAG.getConstant(0, ShiftVT), ShiftAmt);
2442 SDValue UpperHalfSign =
2443 DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2444 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2445 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2446 Op0, DAG.getConstant(31, MVT::i32))));
2447 SDValue UpperHalfSignMask =
2448 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2449 SDValue UpperLowerMask =
2450 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2451 DAG.getConstant(0xff00, MVT::i16));
2452 SDValue UpperLowerSelect =
2453 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2454 UpperHalfSignMask, Op0, UpperLowerMask);
2455 SDValue RotateLeftBytes =
2456 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2457 UpperLowerSelect, ShiftAmt);
2458 SDValue RotateLeftBits =
2459 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2460 RotateLeftBytes, ShiftAmt);
2462 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2470 //! Lower byte immediate operations for v16i8 vectors:
2472 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2475 MVT VT = Op.getValueType();
2477 ConstVec = Op.getOperand(0);
2478 Arg = Op.getOperand(1);
2479 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2480 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2481 ConstVec = ConstVec.getOperand(0);
2483 ConstVec = Op.getOperand(1);
2484 Arg = Op.getOperand(0);
2485 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2486 ConstVec = ConstVec.getOperand(0);
2491 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2492 uint64_t VectorBits[2];
2493 uint64_t UndefBits[2];
2494 uint64_t SplatBits, SplatUndef;
2497 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2498 && isConstantSplat(VectorBits, UndefBits,
2499 VT.getVectorElementType().getSizeInBits(),
2500 SplatBits, SplatUndef, SplatSize)) {
2502 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2503 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2505 // Turn the BUILD_VECTOR into a set of target constants:
2506 for (size_t i = 0; i < tcVecSize; ++i)
2509 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2510 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2513 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2514 // lowered. Return the operation, rather than a null SDValue.
2518 //! Lower i32 multiplication
2519 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2521 switch (VT.getSimpleVT()) {
2523 cerr << "CellSPU: Unknown LowerMUL value type, got "
2524 << Op.getValueType().getMVTString()
2530 SDValue rA = Op.getOperand(0);
2531 SDValue rB = Op.getOperand(1);
2533 return DAG.getNode(ISD::ADD, MVT::i32,
2534 DAG.getNode(ISD::ADD, MVT::i32,
2535 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2536 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2537 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2544 //! Custom lowering for CTPOP (count population)
2546 Custom lowering code that counts the number ones in the input
2547 operand. SPU has such an instruction, but it counts the number of
2548 ones per byte, which then have to be accumulated.
2550 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2551 MVT VT = Op.getValueType();
2552 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2554 switch (VT.getSimpleVT()) {
2556 assert(false && "Invalid value type!");
2558 SDValue N = Op.getOperand(0);
2559 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2561 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2562 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2564 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2568 MachineFunction &MF = DAG.getMachineFunction();
2569 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2571 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2573 SDValue N = Op.getOperand(0);
2574 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2575 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2576 SDValue Shift1 = DAG.getConstant(8, MVT::i16);
2578 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2579 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2581 // CNTB_result becomes the chain to which all of the virtual registers
2582 // CNTB_reg, SUM1_reg become associated:
2583 SDValue CNTB_result =
2584 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2586 SDValue CNTB_rescopy =
2587 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2589 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2591 return DAG.getNode(ISD::AND, MVT::i16,
2592 DAG.getNode(ISD::ADD, MVT::i16,
2593 DAG.getNode(ISD::SRL, MVT::i16,
2600 MachineFunction &MF = DAG.getMachineFunction();
2601 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2603 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2604 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2606 SDValue N = Op.getOperand(0);
2607 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2608 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2609 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2610 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2612 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2613 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2615 // CNTB_result becomes the chain to which all of the virtual registers
2616 // CNTB_reg, SUM1_reg become associated:
2617 SDValue CNTB_result =
2618 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2620 SDValue CNTB_rescopy =
2621 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2624 DAG.getNode(ISD::SRL, MVT::i32,
2625 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2628 DAG.getNode(ISD::ADD, MVT::i32,
2629 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2631 SDValue Sum1_rescopy =
2632 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2635 DAG.getNode(ISD::SRL, MVT::i32,
2636 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2639 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2640 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2642 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2652 /// LowerOperation - Provide custom lowering hooks for some operations.
2655 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2657 unsigned Opc = (unsigned) Op.getOpcode();
2658 MVT VT = Op.getValueType();
2662 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2663 cerr << "Op.getOpcode() = " << Opc << "\n";
2664 cerr << "*Op.getNode():\n";
2665 Op.getNode()->dump();
2671 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2673 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2674 case ISD::ConstantPool:
2675 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2676 case ISD::GlobalAddress:
2677 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2678 case ISD::JumpTable:
2679 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2681 return LowerConstant(Op, DAG);
2682 case ISD::ConstantFP:
2683 return LowerConstantFP(Op, DAG);
2685 return LowerBRCOND(Op, DAG);
2686 case ISD::FORMAL_ARGUMENTS:
2687 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2689 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2691 return LowerRET(Op, DAG, getTargetMachine());
2694 // i8, i64 math ops:
2695 case ISD::ZERO_EXTEND:
2696 case ISD::SIGN_EXTEND:
2697 case ISD::ANY_EXTEND:
2706 return LowerI8Math(Op, DAG, Opc);
2707 else if (VT == MVT::i64)
2708 return LowerI64Math(Op, DAG, Opc);
2712 // Vector-related lowering.
2713 case ISD::BUILD_VECTOR:
2714 return LowerBUILD_VECTOR(Op, DAG);
2715 case ISD::SCALAR_TO_VECTOR:
2716 return LowerSCALAR_TO_VECTOR(Op, DAG);
2717 case ISD::VECTOR_SHUFFLE:
2718 return LowerVECTOR_SHUFFLE(Op, DAG);
2719 case ISD::EXTRACT_VECTOR_ELT:
2720 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2721 case ISD::INSERT_VECTOR_ELT:
2722 return LowerINSERT_VECTOR_ELT(Op, DAG);
2724 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2728 return LowerByteImmed(Op, DAG);
2730 // Vector and i8 multiply:
2733 return LowerVectorMUL(Op, DAG);
2734 else if (VT == MVT::i8)
2735 return LowerI8Math(Op, DAG, Opc);
2737 return LowerMUL(Op, DAG, VT, Opc);
2740 if (VT == MVT::f32 || VT == MVT::v4f32)
2741 return LowerFDIVf32(Op, DAG);
2742 // else if (Op.getValueType() == MVT::f64)
2743 // return LowerFDIVf64(Op, DAG);
2745 assert(0 && "Calling FDIV on unsupported MVT");
2748 return LowerCTPOP(Op, DAG);
2754 //===----------------------------------------------------------------------===//
2755 // Target Optimization Hooks
2756 //===----------------------------------------------------------------------===//
2759 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2762 TargetMachine &TM = getTargetMachine();
2764 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2765 SelectionDAG &DAG = DCI.DAG;
2766 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2767 SDValue Result; // Initially, NULL result
2769 switch (N->getOpcode()) {
2772 SDValue Op1 = N->getOperand(1);
2774 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2775 SDValue Op01 = Op0.getOperand(1);
2776 if (Op01.getOpcode() == ISD::Constant
2777 || Op01.getOpcode() == ISD::TargetConstant) {
2778 // (add <const>, (SPUindirect <arg>, <const>)) ->
2779 // (SPUindirect <arg>, <const + const>)
2780 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2781 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2782 SDValue combinedConst =
2783 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2784 Op0.getValueType());
2786 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2787 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2788 DEBUG(cerr << "With: (SPUindirect <arg>, "
2789 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2790 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2791 Op0.getOperand(0), combinedConst);
2793 } else if (isa<ConstantSDNode>(Op0)
2794 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2795 SDValue Op11 = Op1.getOperand(1);
2796 if (Op11.getOpcode() == ISD::Constant
2797 || Op11.getOpcode() == ISD::TargetConstant) {
2798 // (add (SPUindirect <arg>, <const>), <const>) ->
2799 // (SPUindirect <arg>, <const + const>)
2800 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2801 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2802 SDValue combinedConst =
2803 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2804 Op0.getValueType());
2806 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2807 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2808 DEBUG(cerr << "With: (SPUindirect <arg>, "
2809 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2811 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2812 Op1.getOperand(0), combinedConst);
2817 case ISD::SIGN_EXTEND:
2818 case ISD::ZERO_EXTEND:
2819 case ISD::ANY_EXTEND: {
2820 if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2821 N->getValueType(0) == Op0.getValueType()) {
2822 // (any_extend (SPUextract_elt0 <arg>)) ->
2823 // (SPUextract_elt0 <arg>)
2824 // Types must match, however...
2825 DEBUG(cerr << "Replace: ");
2826 DEBUG(N->dump(&DAG));
2827 DEBUG(cerr << "\nWith: ");
2828 DEBUG(Op0.getNode()->dump(&DAG));
2829 DEBUG(cerr << "\n");
2835 case SPUISD::IndirectAddr: {
2836 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2837 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2838 if (CN->getZExtValue() == 0) {
2839 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2840 // (SPUaform <addr>, 0)
2842 DEBUG(cerr << "Replace: ");
2843 DEBUG(N->dump(&DAG));
2844 DEBUG(cerr << "\nWith: ");
2845 DEBUG(Op0.getNode()->dump(&DAG));
2846 DEBUG(cerr << "\n");
2853 case SPUISD::SHLQUAD_L_BITS:
2854 case SPUISD::SHLQUAD_L_BYTES:
2855 case SPUISD::VEC_SHL:
2856 case SPUISD::VEC_SRL:
2857 case SPUISD::VEC_SRA:
2858 case SPUISD::ROTQUAD_RZ_BYTES:
2859 case SPUISD::ROTQUAD_RZ_BITS: {
2860 SDValue Op1 = N->getOperand(1);
2862 if (isa<ConstantSDNode>(Op1)) {
2863 // Kill degenerate vector shifts:
2864 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2866 if (CN->getZExtValue() == 0) {
2872 case SPUISD::PROMOTE_SCALAR: {
2873 switch (Op0.getOpcode()) {
2876 case ISD::ANY_EXTEND:
2877 case ISD::ZERO_EXTEND:
2878 case ISD::SIGN_EXTEND: {
2879 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2881 // but only if the SPUpromote_scalar and <arg> types match.
2882 SDValue Op00 = Op0.getOperand(0);
2883 if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2884 SDValue Op000 = Op00.getOperand(0);
2885 if (Op000.getValueType() == N->getValueType(0)) {
2891 case SPUISD::EXTRACT_ELT0: {
2892 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2894 Result = Op0.getOperand(0);
2901 // Otherwise, return unchanged.
2903 if (Result.getNode()) {
2904 DEBUG(cerr << "\nReplace.SPU: ");
2905 DEBUG(N->dump(&DAG));
2906 DEBUG(cerr << "\nWith: ");
2907 DEBUG(Result.getNode()->dump(&DAG));
2908 DEBUG(cerr << "\n");
2915 //===----------------------------------------------------------------------===//
2916 // Inline Assembly Support
2917 //===----------------------------------------------------------------------===//
2919 /// getConstraintType - Given a constraint letter, return the type of
2920 /// constraint it is for this target.
2921 SPUTargetLowering::ConstraintType
2922 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2923 if (ConstraintLetter.size() == 1) {
2924 switch (ConstraintLetter[0]) {
2931 return C_RegisterClass;
2934 return TargetLowering::getConstraintType(ConstraintLetter);
2937 std::pair<unsigned, const TargetRegisterClass*>
2938 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2941 if (Constraint.size() == 1) {
2942 // GCC RS6000 Constraint Letters
2943 switch (Constraint[0]) {
2947 return std::make_pair(0U, SPU::R64CRegisterClass);
2948 return std::make_pair(0U, SPU::R32CRegisterClass);
2951 return std::make_pair(0U, SPU::R32FPRegisterClass);
2952 else if (VT == MVT::f64)
2953 return std::make_pair(0U, SPU::R64FPRegisterClass);
2956 return std::make_pair(0U, SPU::GPRCRegisterClass);
2960 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2963 //! Compute used/known bits for a SPU operand
2965 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2969 const SelectionDAG &DAG,
2970 unsigned Depth ) const {
2972 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2975 switch (Op.getOpcode()) {
2977 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2987 case SPUISD::PROMOTE_SCALAR: {
2988 SDValue Op0 = Op.getOperand(0);
2989 MVT Op0VT = Op0.getValueType();
2990 unsigned Op0VTBits = Op0VT.getSizeInBits();
2991 uint64_t InMask = Op0VT.getIntegerVTBitMask();
2992 KnownZero |= APInt(Op0VTBits, ~InMask, false);
2993 KnownOne |= APInt(Op0VTBits, InMask, false);
2997 case SPUISD::LDRESULT:
2998 case SPUISD::EXTRACT_ELT0:
2999 case SPUISD::EXTRACT_ELT0_CHAINED: {
3000 MVT OpVT = Op.getValueType();
3001 unsigned OpVTBits = OpVT.getSizeInBits();
3002 uint64_t InMask = OpVT.getIntegerVTBitMask();
3003 KnownZero |= APInt(OpVTBits, ~InMask, false);
3004 KnownOne |= APInt(OpVTBits, InMask, false);
3009 case EXTRACT_I1_ZEXT:
3010 case EXTRACT_I1_SEXT:
3011 case EXTRACT_I8_ZEXT:
3012 case EXTRACT_I8_SEXT:
3017 case SPUISD::SHLQUAD_L_BITS:
3018 case SPUISD::SHLQUAD_L_BYTES:
3019 case SPUISD::VEC_SHL:
3020 case SPUISD::VEC_SRL:
3021 case SPUISD::VEC_SRA:
3022 case SPUISD::VEC_ROTL:
3023 case SPUISD::VEC_ROTR:
3024 case SPUISD::ROTQUAD_RZ_BYTES:
3025 case SPUISD::ROTQUAD_RZ_BITS:
3026 case SPUISD::ROTBYTES_RIGHT_S:
3027 case SPUISD::ROTBYTES_LEFT:
3028 case SPUISD::ROTBYTES_LEFT_CHAINED:
3029 case SPUISD::SELECT_MASK:
3031 case SPUISD::FPInterp:
3032 case SPUISD::FPRecipEst:
3033 case SPUISD::SEXT32TO64:
3038 // LowerAsmOperandForConstraint
3040 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3041 char ConstraintLetter,
3043 std::vector<SDValue> &Ops,
3044 SelectionDAG &DAG) const {
3045 // Default, for the time being, to the base class handler
3046 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3050 /// isLegalAddressImmediate - Return true if the integer value can be used
3051 /// as the offset of the target addressing mode.
3052 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3053 const Type *Ty) const {
3054 // SPU's addresses are 256K:
3055 return (V > -(1 << 18) && V < (1 << 18) - 1);
3058 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3063 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3064 // The SPU target isn't yet aware of offsets.