1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT mapping to useful data for Cell SPU
41 struct valtype_map_s {
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDValue &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDValue &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // SPU has no sign or zero extended loads for i1, i8, i16:
134 setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote);
135 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
136 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
137 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
138 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
139 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
140 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
141 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
143 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
144 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
145 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
146 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
147 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
148 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
149 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
150 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
152 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
153 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
154 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
156 // SPU constant load actions are custom lowered:
157 setOperationAction(ISD::Constant, MVT::i64, Custom);
158 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
159 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
161 // SPU's loads and stores have to be custom lowered:
162 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
164 MVT VT = (MVT::SimpleValueType)sctype;
166 setOperationAction(ISD::LOAD, VT, Custom);
167 setOperationAction(ISD::STORE, VT, Custom);
170 // Custom lower BRCOND for i1, i8 to "promote" the result to
171 // i32 and i16, respectively.
172 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
174 // Expand the jumptable branches
175 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
176 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
177 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
179 // SPU has no intrinsics for these particular operations:
180 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
182 // PowerPC has no SREM/UREM instructions
183 setOperationAction(ISD::SREM, MVT::i32, Expand);
184 setOperationAction(ISD::UREM, MVT::i32, Expand);
185 setOperationAction(ISD::SREM, MVT::i64, Expand);
186 setOperationAction(ISD::UREM, MVT::i64, Expand);
188 // We don't support sin/cos/sqrt/fmod
189 setOperationAction(ISD::FSIN , MVT::f64, Expand);
190 setOperationAction(ISD::FCOS , MVT::f64, Expand);
191 setOperationAction(ISD::FREM , MVT::f64, Expand);
192 setOperationAction(ISD::FLOG , MVT::f64, Expand);
193 setOperationAction(ISD::FLOG2, MVT::f64, Expand);
194 setOperationAction(ISD::FLOG10,MVT::f64, Expand);
195 setOperationAction(ISD::FEXP , MVT::f64, Expand);
196 setOperationAction(ISD::FEXP2, MVT::f64, Expand);
197 setOperationAction(ISD::FSIN , MVT::f32, Expand);
198 setOperationAction(ISD::FCOS , MVT::f32, Expand);
199 setOperationAction(ISD::FREM , MVT::f32, Expand);
200 setOperationAction(ISD::FLOG , MVT::f32, Expand);
201 setOperationAction(ISD::FLOG2, MVT::f32, Expand);
202 setOperationAction(ISD::FLOG10,MVT::f32, Expand);
203 setOperationAction(ISD::FEXP , MVT::f32, Expand);
204 setOperationAction(ISD::FEXP2, MVT::f32, Expand);
206 // If we're enabling GP optimizations, use hardware square root
207 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
208 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
210 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
211 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
213 // SPU can do rotate right and left, so legalize it... but customize for i8
214 // because instructions don't exist.
216 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
218 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
219 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
220 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
222 setOperationAction(ISD::ROTL, MVT::i32, Legal);
223 setOperationAction(ISD::ROTL, MVT::i16, Legal);
224 setOperationAction(ISD::ROTL, MVT::i8, Custom);
225 // SPU has no native version of shift left/right for i8
226 setOperationAction(ISD::SHL, MVT::i8, Custom);
227 setOperationAction(ISD::SRL, MVT::i8, Custom);
228 setOperationAction(ISD::SRA, MVT::i8, Custom);
229 // And SPU needs custom lowering for shift left/right for i64
230 setOperationAction(ISD::SHL, MVT::i64, Custom);
231 setOperationAction(ISD::SRL, MVT::i64, Custom);
232 setOperationAction(ISD::SRA, MVT::i64, Custom);
234 // Custom lower i8, i32 and i64 multiplications
235 setOperationAction(ISD::MUL, MVT::i8, Custom);
236 setOperationAction(ISD::MUL, MVT::i32, Custom);
237 setOperationAction(ISD::MUL, MVT::i64, Custom);
239 // Need to custom handle (some) common i8, i64 math ops
240 setOperationAction(ISD::ADD, MVT::i64, Custom);
241 setOperationAction(ISD::SUB, MVT::i8, Custom);
242 setOperationAction(ISD::SUB, MVT::i64, Custom);
244 // SPU does not have BSWAP. It does have i32 support CTLZ.
245 // CTPOP has to be custom lowered.
246 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
247 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
249 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
250 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
251 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
252 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
254 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
255 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
257 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
259 // SPU has a version of select that implements (a&~c)|(b&c), just like
260 // select ought to work:
261 setOperationAction(ISD::SELECT, MVT::i1, Promote);
262 setOperationAction(ISD::SELECT, MVT::i8, Legal);
263 setOperationAction(ISD::SELECT, MVT::i16, Legal);
264 setOperationAction(ISD::SELECT, MVT::i32, Legal);
265 setOperationAction(ISD::SELECT, MVT::i64, Expand);
267 setOperationAction(ISD::SETCC, MVT::i1, Promote);
268 setOperationAction(ISD::SETCC, MVT::i8, Legal);
269 setOperationAction(ISD::SETCC, MVT::i16, Legal);
270 setOperationAction(ISD::SETCC, MVT::i32, Legal);
271 setOperationAction(ISD::SETCC, MVT::i64, Expand);
273 // Zero extension and sign extension for i64 have to be
275 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
276 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
277 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
279 // SPU has a legal FP -> signed INT instruction
280 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
281 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
282 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
283 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
285 // FDIV on SPU requires custom lowering
286 setOperationAction(ISD::FDIV, MVT::f32, Custom);
287 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
289 // SPU has [U|S]INT_TO_FP
290 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
291 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
292 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
293 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
294 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
295 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
296 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
297 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
299 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
300 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
301 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
302 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
304 // We cannot sextinreg(i1). Expand to shifts.
305 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
307 // Support label based line numbers.
308 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
309 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
311 // We want to legalize GlobalAddress and ConstantPool nodes into the
312 // appropriate instructions to materialize the address.
313 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
315 MVT VT = (MVT::SimpleValueType)sctype;
317 setOperationAction(ISD::GlobalAddress, VT, Custom);
318 setOperationAction(ISD::ConstantPool, VT, Custom);
319 setOperationAction(ISD::JumpTable, VT, Custom);
322 // RET must be custom lowered, to meet ABI requirements
323 setOperationAction(ISD::RET, MVT::Other, Custom);
325 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
326 setOperationAction(ISD::VASTART , MVT::Other, Custom);
328 // Use the default implementation.
329 setOperationAction(ISD::VAARG , MVT::Other, Expand);
330 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
331 setOperationAction(ISD::VAEND , MVT::Other, Expand);
332 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
333 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
334 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
335 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
337 // Cell SPU has instructions for converting between i64 and fp.
338 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
339 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
341 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
342 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
344 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
345 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
347 // First set operation action for all vector types to expand. Then we
348 // will selectively turn on ones that can be effectively codegen'd.
349 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
350 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
351 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
352 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
353 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
354 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
356 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
357 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
358 MVT VT = (MVT::SimpleValueType)i;
360 // add/sub are legal for all supported vector VT's.
361 setOperationAction(ISD::ADD , VT, Legal);
362 setOperationAction(ISD::SUB , VT, Legal);
363 // mul has to be custom lowered.
364 setOperationAction(ISD::MUL , VT, Custom);
366 setOperationAction(ISD::AND , VT, Legal);
367 setOperationAction(ISD::OR , VT, Legal);
368 setOperationAction(ISD::XOR , VT, Legal);
369 setOperationAction(ISD::LOAD , VT, Legal);
370 setOperationAction(ISD::SELECT, VT, Legal);
371 setOperationAction(ISD::STORE, VT, Legal);
373 // These operations need to be expanded:
374 setOperationAction(ISD::SDIV, VT, Expand);
375 setOperationAction(ISD::SREM, VT, Expand);
376 setOperationAction(ISD::UDIV, VT, Expand);
377 setOperationAction(ISD::UREM, VT, Expand);
378 setOperationAction(ISD::FDIV, VT, Custom);
380 // Custom lower build_vector, constant pool spills, insert and
381 // extract vector elements:
382 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
383 setOperationAction(ISD::ConstantPool, VT, Custom);
384 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
385 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
386 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
387 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
390 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
391 setOperationAction(ISD::AND, MVT::v16i8, Custom);
392 setOperationAction(ISD::OR, MVT::v16i8, Custom);
393 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
394 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
396 setShiftAmountType(MVT::i32);
397 setSetCCResultContents(ZeroOrOneSetCCResult);
399 setStackPointerRegisterToSaveRestore(SPU::R1);
401 // We have target-specific dag combine patterns for the following nodes:
402 setTargetDAGCombine(ISD::ADD);
403 setTargetDAGCombine(ISD::ZERO_EXTEND);
404 setTargetDAGCombine(ISD::SIGN_EXTEND);
405 setTargetDAGCombine(ISD::ANY_EXTEND);
407 computeRegisterProperties();
411 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
413 if (node_names.empty()) {
414 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
415 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
416 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
417 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
418 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
419 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
420 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
421 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
422 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
423 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
424 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
425 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
426 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
427 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
428 = "SPUISD::EXTRACT_ELT0_CHAINED";
429 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
430 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
431 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
432 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
433 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
434 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
435 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
436 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
437 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
438 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
439 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
440 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
441 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
442 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
443 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
444 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
445 "SPUISD::ROTQUAD_RZ_BYTES";
446 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
447 "SPUISD::ROTQUAD_RZ_BITS";
448 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
449 "SPUISD::ROTBYTES_RIGHT_S";
450 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
451 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
452 "SPUISD::ROTBYTES_LEFT_CHAINED";
453 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
454 "SPUISD::ROTBYTES_LEFT_BITS";
455 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
456 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
457 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
458 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
459 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
460 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
461 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
462 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
463 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
466 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
468 return ((i != node_names.end()) ? i->second : 0);
471 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
472 MVT VT = Op.getValueType();
479 //===----------------------------------------------------------------------===//
480 // Calling convention code:
481 //===----------------------------------------------------------------------===//
483 #include "SPUGenCallingConv.inc"
485 //===----------------------------------------------------------------------===//
486 // LowerOperation implementation
487 //===----------------------------------------------------------------------===//
489 /// Aligned load common code for CellSPU
491 \param[in] Op The SelectionDAG load or store operand
492 \param[in] DAG The selection DAG
493 \param[in] ST CellSPU subtarget information structure
494 \param[in,out] alignment Caller initializes this to the load or store node's
495 value from getAlignment(), may be updated while generating the aligned load
496 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
497 offset (divisible by 16, modulo 16 == 0)
498 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
499 offset of the preferred slot (modulo 16 != 0)
500 \param[in,out] VT Caller initializes this value type to the the load or store
501 node's loaded or stored value type; may be updated if an i1-extended load or
503 \param[out] was16aligned true if the base pointer had 16-byte alignment,
504 otherwise false. Can help to determine if the chunk needs to be rotated.
506 Both load and store lowering load a block of data aligned on a 16-byte
507 boundary. This is the common aligned load code shared between both.
510 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
512 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
513 MVT &VT, bool &was16aligned)
515 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
516 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
517 SDValue basePtr = LSN->getBasePtr();
518 SDValue chain = LSN->getChain();
520 if (basePtr.getOpcode() == ISD::ADD) {
521 SDValue Op1 = basePtr.getNode()->getOperand(1);
523 if (Op1.getOpcode() == ISD::Constant
524 || Op1.getOpcode() == ISD::TargetConstant) {
525 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
527 alignOffs = (int) CN->getZExtValue();
528 prefSlotOffs = (int) (alignOffs & 0xf);
530 // Adjust the rotation amount to ensure that the final result ends up in
531 // the preferred slot:
532 prefSlotOffs -= vtm->prefslot_byte;
533 basePtr = basePtr.getOperand(0);
535 // Loading from memory, can we adjust alignment?
536 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
537 SDValue APtr = basePtr.getOperand(0);
538 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
539 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
540 alignment = GSDN->getGlobal()->getAlignment();
545 prefSlotOffs = -vtm->prefslot_byte;
547 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
548 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
549 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
550 prefSlotOffs = (int) (alignOffs & 0xf);
551 prefSlotOffs -= vtm->prefslot_byte;
552 basePtr = DAG.getRegister(SPU::R1, VT);
555 prefSlotOffs = -vtm->prefslot_byte;
558 if (alignment == 16) {
559 // Realign the base pointer as a D-Form address:
560 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
561 basePtr = DAG.getNode(ISD::ADD, PtrVT,
563 DAG.getConstant((alignOffs & ~0xf), PtrVT));
566 // Emit the vector load:
568 return DAG.getLoad(MVT::v16i8, chain, basePtr,
569 LSN->getSrcValue(), LSN->getSrcValueOffset(),
570 LSN->isVolatile(), 16);
573 // Unaligned load or we're using the "large memory" model, which means that
574 // we have to be very pessimistic:
575 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
576 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
577 DAG.getConstant(0, PtrVT));
581 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
582 DAG.getConstant((alignOffs & ~0xf), PtrVT));
583 was16aligned = false;
584 return DAG.getLoad(MVT::v16i8, chain, basePtr,
585 LSN->getSrcValue(), LSN->getSrcValueOffset(),
586 LSN->isVolatile(), 16);
589 /// Custom lower loads for CellSPU
591 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
592 within a 16-byte block, we have to rotate to extract the requested element.
595 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
596 LoadSDNode *LN = cast<LoadSDNode>(Op);
597 SDValue the_chain = LN->getChain();
598 MVT VT = LN->getMemoryVT();
599 MVT OpVT = Op.getNode()->getValueType(0);
600 ISD::LoadExtType ExtType = LN->getExtensionType();
601 unsigned alignment = LN->getAlignment();
604 switch (LN->getAddressingMode()) {
605 case ISD::UNINDEXED: {
609 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
611 if (result.getNode() == 0)
614 the_chain = result.getValue(1);
615 // Rotate the chunk if necessary
618 if (rotamt != 0 || !was16aligned) {
619 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
624 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
626 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
627 LoadSDNode *LN1 = cast<LoadSDNode>(result);
628 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
629 DAG.getConstant(rotamt, PtrVT));
632 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
633 the_chain = result.getValue(1);
636 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
638 MVT vecVT = MVT::v16i8;
640 // Convert the loaded v16i8 vector to the appropriate vector type
641 // specified by the operand:
644 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
646 vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
649 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
650 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
651 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
652 the_chain = result.getValue(1);
654 // Handle the sign and zero-extending loads for i1 and i8:
657 if (ExtType == ISD::SEXTLOAD) {
658 NewOpC = (OpVT == MVT::i1
659 ? SPUISD::EXTRACT_I1_SEXT
660 : SPUISD::EXTRACT_I8_SEXT);
662 assert(ExtType == ISD::ZEXTLOAD);
663 NewOpC = (OpVT == MVT::i1
664 ? SPUISD::EXTRACT_I1_ZEXT
665 : SPUISD::EXTRACT_I8_ZEXT);
668 result = DAG.getNode(NewOpC, OpVT, result);
671 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
672 SDValue retops[2] = {
677 result = DAG.getNode(SPUISD::LDRESULT, retvts,
678 retops, sizeof(retops) / sizeof(retops[0]));
685 case ISD::LAST_INDEXED_MODE:
686 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
688 cerr << (unsigned) LN->getAddressingMode() << "\n";
696 /// Custom lower stores for CellSPU
698 All CellSPU stores are aligned to 16-byte boundaries, so for elements
699 within a 16-byte block, we have to generate a shuffle to insert the
700 requested element into its place, then store the resulting block.
703 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
704 StoreSDNode *SN = cast<StoreSDNode>(Op);
705 SDValue Value = SN->getValue();
706 MVT VT = Value.getValueType();
707 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
708 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
709 unsigned alignment = SN->getAlignment();
711 switch (SN->getAddressingMode()) {
712 case ISD::UNINDEXED: {
713 int chunk_offset, slot_offset;
716 // The vector type we really want to load from the 16-byte chunk, except
717 // in the case of MVT::i1, which has to be v16i8.
718 MVT vecVT, stVecVT = MVT::v16i8;
721 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
722 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
724 SDValue alignLoadVec =
725 AlignedLoad(Op, DAG, ST, SN, alignment,
726 chunk_offset, slot_offset, VT, was16aligned);
728 if (alignLoadVec.getNode() == 0)
731 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
732 SDValue basePtr = LN->getBasePtr();
733 SDValue the_chain = alignLoadVec.getValue(1);
734 SDValue theValue = SN->getValue();
738 && (theValue.getOpcode() == ISD::AssertZext
739 || theValue.getOpcode() == ISD::AssertSext)) {
740 // Drill down and get the value for zero- and sign-extended
742 theValue = theValue.getOperand(0);
747 SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
748 SDValue insertEltPtr;
751 // If the base pointer is already a D-form address, then just create
752 // a new D-form address with a slot offset and the orignal base pointer.
753 // Otherwise generate a D-form address with the slot offset relative
754 // to the stack pointer, which is always aligned.
755 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
756 DEBUG(basePtr.getNode()->dump(&DAG));
759 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
760 (basePtr.getOpcode() == ISD::ADD
761 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
762 insertEltPtr = basePtr;
764 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
767 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
768 result = DAG.getNode(SPUISD::SHUFB, vecVT,
769 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
771 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
773 result = DAG.getStore(the_chain, result, basePtr,
774 LN->getSrcValue(), LN->getSrcValueOffset(),
775 LN->isVolatile(), LN->getAlignment());
784 case ISD::LAST_INDEXED_MODE:
785 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
787 cerr << (unsigned) SN->getAddressingMode() << "\n";
795 /// Generate the address of a constant pool entry.
797 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
798 MVT PtrVT = Op.getValueType();
799 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
800 Constant *C = CP->getConstVal();
801 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
802 SDValue Zero = DAG.getConstant(0, PtrVT);
803 const TargetMachine &TM = DAG.getTarget();
805 if (TM.getRelocationModel() == Reloc::Static) {
806 if (!ST->usingLargeMem()) {
807 // Just return the SDValue with the constant pool address in it.
808 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
810 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
811 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
812 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
817 "LowerConstantPool: Relocation model other than static"
823 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
824 MVT PtrVT = Op.getValueType();
825 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
826 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
827 SDValue Zero = DAG.getConstant(0, PtrVT);
828 const TargetMachine &TM = DAG.getTarget();
830 if (TM.getRelocationModel() == Reloc::Static) {
831 if (!ST->usingLargeMem()) {
832 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
834 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
835 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
836 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
841 "LowerJumpTable: Relocation model other than static not supported.");
846 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
847 MVT PtrVT = Op.getValueType();
848 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
849 GlobalValue *GV = GSDN->getGlobal();
850 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
851 const TargetMachine &TM = DAG.getTarget();
852 SDValue Zero = DAG.getConstant(0, PtrVT);
854 if (TM.getRelocationModel() == Reloc::Static) {
855 if (!ST->usingLargeMem()) {
856 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
858 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
859 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
860 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
863 cerr << "LowerGlobalAddress: Relocation model other than static not "
872 //! Custom lower i64 integer constants
874 This code inserts all of the necessary juggling that needs to occur to load
875 a 64-bit constant into a register.
878 LowerConstant(SDValue Op, SelectionDAG &DAG) {
879 MVT VT = Op.getValueType();
880 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
882 if (VT == MVT::i64) {
883 SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
884 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
885 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
887 cerr << "LowerConstant: unhandled constant type "
897 //! Custom lower double precision floating point constants
899 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
900 MVT VT = Op.getValueType();
901 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
904 "LowerConstantFP: Node is not ConstantFPSDNode");
906 if (VT == MVT::f64) {
907 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
908 return DAG.getNode(ISD::BIT_CONVERT, VT,
909 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
915 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
917 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
919 SDValue Cond = Op.getOperand(1);
920 MVT CondVT = Cond.getValueType();
923 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
924 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
925 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
927 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
930 return SDValue(); // Unchanged
934 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
936 MachineFunction &MF = DAG.getMachineFunction();
937 MachineFrameInfo *MFI = MF.getFrameInfo();
938 MachineRegisterInfo &RegInfo = MF.getRegInfo();
939 SmallVector<SDValue, 8> ArgValues;
940 SDValue Root = Op.getOperand(0);
941 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
943 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
944 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
946 unsigned ArgOffset = SPUFrameInfo::minStackSize();
947 unsigned ArgRegIdx = 0;
948 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
950 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
952 // Add DAG nodes to load the arguments or copy them out of registers.
953 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
954 ArgNo != e; ++ArgNo) {
956 bool needsLoad = false;
957 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
958 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
960 switch (ObjectVT.getSimpleVT()) {
962 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
963 << ObjectVT.getMVTString()
968 if (!isVarArg && ArgRegIdx < NumArgRegs) {
969 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
970 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
971 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
978 if (!isVarArg && ArgRegIdx < NumArgRegs) {
979 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
980 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
981 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
988 if (!isVarArg && ArgRegIdx < NumArgRegs) {
989 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
990 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
991 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
998 if (!isVarArg && ArgRegIdx < NumArgRegs) {
999 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
1000 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
1008 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1009 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
1010 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1011 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
1018 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1019 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
1020 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1021 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
1033 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1034 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1035 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1036 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1044 // We need to load the argument to a virtual register if we determined above
1045 // that we ran out of physical registers of the appropriate type
1047 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1048 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1049 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1050 ArgOffset += StackSlotSize;
1053 ArgValues.push_back(ArgVal);
1056 // If the function takes variable number of arguments, make a frame index for
1057 // the start of the first vararg value... for expansion of llvm.va_start.
1059 VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1061 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1062 // If this function is vararg, store any remaining integer argument regs to
1063 // their spots on the stack so that they may be loaded by deferencing the
1064 // result of va_next.
1065 SmallVector<SDValue, 8> MemOps;
1066 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1067 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1068 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1069 SDValue Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1070 SDValue Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1071 MemOps.push_back(Store);
1072 // Increment the address by four for the next argument to store
1073 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1074 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1076 if (!MemOps.empty())
1077 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1080 ArgValues.push_back(Root);
1082 // Return the new list of results.
1083 return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1087 /// isLSAAddress - Return the immediate to use if the specified
1088 /// value is representable as a LSA address.
1089 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1090 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1093 int Addr = C->getZExtValue();
1094 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1095 (Addr << 14 >> 14) != Addr)
1096 return 0; // Top 14 bits have to be sext of immediate.
1098 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1103 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1104 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1105 SDValue Chain = TheCall->getChain();
1107 bool isVarArg = TheCall->isVarArg();
1108 bool isTailCall = TheCall->isTailCall();
1110 SDValue Callee = TheCall->getCallee();
1111 unsigned NumOps = TheCall->getNumArgs();
1112 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1113 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1114 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1116 // Handy pointer type
1117 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1119 // Accumulate how many bytes are to be pushed on the stack, including the
1120 // linkage area, and parameter passing area. According to the SPU ABI,
1121 // we minimally need space for [LR] and [SP]
1122 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1124 // Set up a copy of the stack pointer for use loading and storing any
1125 // arguments that may not fit in the registers available for argument
1127 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1129 // Figure out which arguments are going to go in registers, and which in
1131 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1132 unsigned ArgRegIdx = 0;
1134 // Keep track of registers passing arguments
1135 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1136 // And the arguments passed on the stack
1137 SmallVector<SDValue, 8> MemOpChains;
1139 for (unsigned i = 0; i != NumOps; ++i) {
1140 SDValue Arg = TheCall->getArg(i);
1142 // PtrOff will be used to store the current argument to the stack if a
1143 // register cannot be found for it.
1144 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1145 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1147 switch (Arg.getValueType().getSimpleVT()) {
1148 default: assert(0 && "Unexpected ValueType for argument!");
1152 if (ArgRegIdx != NumArgRegs) {
1153 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1155 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1156 ArgOffset += StackSlotSize;
1161 if (ArgRegIdx != NumArgRegs) {
1162 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1164 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1165 ArgOffset += StackSlotSize;
1172 if (ArgRegIdx != NumArgRegs) {
1173 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1175 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1176 ArgOffset += StackSlotSize;
1182 // Update number of stack bytes actually used, insert a call sequence start
1183 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1184 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1186 if (!MemOpChains.empty()) {
1187 // Adjust the stack pointer for the stack arguments.
1188 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1189 &MemOpChains[0], MemOpChains.size());
1192 // Build a sequence of copy-to-reg nodes chained together with token chain
1193 // and flag operands which copy the outgoing args into the appropriate regs.
1195 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1196 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1198 InFlag = Chain.getValue(1);
1201 SmallVector<SDValue, 8> Ops;
1202 unsigned CallOpc = SPUISD::CALL;
1204 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1205 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1206 // node so that legalize doesn't hack it.
1207 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1208 GlobalValue *GV = G->getGlobal();
1209 MVT CalleeVT = Callee.getValueType();
1210 SDValue Zero = DAG.getConstant(0, PtrVT);
1211 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1213 if (!ST->usingLargeMem()) {
1214 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1215 // style calls, otherwise, external symbols are BRASL calls. This assumes
1216 // that declared/defined symbols are in the same compilation unit and can
1217 // be reached through PC-relative jumps.
1220 // This may be an unsafe assumption for JIT and really large compilation
1222 if (GV->isDeclaration()) {
1223 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1225 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1228 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1230 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1232 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1233 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1234 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1235 // If this is an absolute destination address that appears to be a legal
1236 // local store address, use the munged value.
1237 Callee = SDValue(Dest, 0);
1240 Ops.push_back(Chain);
1241 Ops.push_back(Callee);
1243 // Add argument registers to the end of the list so that they are known live
1245 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1246 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1247 RegsToPass[i].second.getValueType()));
1249 if (InFlag.getNode())
1250 Ops.push_back(InFlag);
1251 // Returns a chain and a flag for retval copy to use.
1252 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1253 &Ops[0], Ops.size());
1254 InFlag = Chain.getValue(1);
1256 Chain = DAG.getCALLSEQ_END(Chain,
1257 DAG.getConstant(NumStackBytes, PtrVT),
1258 DAG.getConstant(0, PtrVT),
1260 if (TheCall->getValueType(0) != MVT::Other)
1261 InFlag = Chain.getValue(1);
1263 SDValue ResultVals[3];
1264 unsigned NumResults = 0;
1266 // If the call has results, copy the values out of the ret val registers.
1267 switch (TheCall->getValueType(0).getSimpleVT()) {
1268 default: assert(0 && "Unexpected ret value!");
1269 case MVT::Other: break;
1271 if (TheCall->getValueType(1) == MVT::i32) {
1272 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1273 ResultVals[0] = Chain.getValue(0);
1274 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1275 Chain.getValue(2)).getValue(1);
1276 ResultVals[1] = Chain.getValue(0);
1279 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1280 ResultVals[0] = Chain.getValue(0);
1285 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1286 ResultVals[0] = Chain.getValue(0);
1291 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1292 InFlag).getValue(1);
1293 ResultVals[0] = Chain.getValue(0);
1301 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1302 InFlag).getValue(1);
1303 ResultVals[0] = Chain.getValue(0);
1308 // If the function returns void, just return the chain.
1309 if (NumResults == 0)
1312 // Otherwise, merge everything together with a MERGE_VALUES node.
1313 ResultVals[NumResults++] = Chain;
1314 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1315 return Res.getValue(Op.getResNo());
1319 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1320 SmallVector<CCValAssign, 16> RVLocs;
1321 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1322 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1323 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1324 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1326 // If this is the first return lowered for this function, add the regs to the
1327 // liveout set for the function.
1328 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1329 for (unsigned i = 0; i != RVLocs.size(); ++i)
1330 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1333 SDValue Chain = Op.getOperand(0);
1336 // Copy the result values into the output registers.
1337 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1338 CCValAssign &VA = RVLocs[i];
1339 assert(VA.isRegLoc() && "Can only return in registers!");
1340 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1341 Flag = Chain.getValue(1);
1345 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1347 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1351 //===----------------------------------------------------------------------===//
1352 // Vector related lowering:
1353 //===----------------------------------------------------------------------===//
1355 static ConstantSDNode *
1356 getVecImm(SDNode *N) {
1357 SDValue OpVal(0, 0);
1359 // Check to see if this buildvec has a single non-undef value in its elements.
1360 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1361 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1362 if (OpVal.getNode() == 0)
1363 OpVal = N->getOperand(i);
1364 else if (OpVal != N->getOperand(i))
1368 if (OpVal.getNode() != 0) {
1369 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1374 return 0; // All UNDEF: use implicit def.; not Constant node
1377 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1378 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1380 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1382 if (ConstantSDNode *CN = getVecImm(N)) {
1383 uint64_t Value = CN->getZExtValue();
1384 if (ValueType == MVT::i64) {
1385 uint64_t UValue = CN->getZExtValue();
1386 uint32_t upper = uint32_t(UValue >> 32);
1387 uint32_t lower = uint32_t(UValue);
1390 Value = Value >> 32;
1392 if (Value <= 0x3ffff)
1393 return DAG.getConstant(Value, ValueType);
1399 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1400 /// and the value fits into a signed 16-bit constant, and if so, return the
1402 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1404 if (ConstantSDNode *CN = getVecImm(N)) {
1405 int64_t Value = CN->getSignExtended();
1406 if (ValueType == MVT::i64) {
1407 uint64_t UValue = CN->getZExtValue();
1408 uint32_t upper = uint32_t(UValue >> 32);
1409 uint32_t lower = uint32_t(UValue);
1412 Value = Value >> 32;
1414 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1415 return DAG.getConstant(Value, ValueType);
1422 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1423 /// and the value fits into a signed 10-bit constant, and if so, return the
1425 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1427 if (ConstantSDNode *CN = getVecImm(N)) {
1428 int64_t Value = CN->getSignExtended();
1429 if (ValueType == MVT::i64) {
1430 uint64_t UValue = CN->getZExtValue();
1431 uint32_t upper = uint32_t(UValue >> 32);
1432 uint32_t lower = uint32_t(UValue);
1435 Value = Value >> 32;
1437 if (isS10Constant(Value))
1438 return DAG.getConstant(Value, ValueType);
1444 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1445 /// and the value fits into a signed 8-bit constant, and if so, return the
1448 /// @note: The incoming vector is v16i8 because that's the only way we can load
1449 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1451 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1453 if (ConstantSDNode *CN = getVecImm(N)) {
1454 int Value = (int) CN->getZExtValue();
1455 if (ValueType == MVT::i16
1456 && Value <= 0xffff /* truncated from uint64_t */
1457 && ((short) Value >> 8) == ((short) Value & 0xff))
1458 return DAG.getConstant(Value & 0xff, ValueType);
1459 else if (ValueType == MVT::i8
1460 && (Value & 0xff) == Value)
1461 return DAG.getConstant(Value, ValueType);
1467 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1468 /// and the value fits into a signed 16-bit constant, and if so, return the
1470 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1472 if (ConstantSDNode *CN = getVecImm(N)) {
1473 uint64_t Value = CN->getZExtValue();
1474 if ((ValueType == MVT::i32
1475 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1476 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1477 return DAG.getConstant(Value >> 16, ValueType);
1483 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1484 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1485 if (ConstantSDNode *CN = getVecImm(N)) {
1486 return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i32);
1492 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1493 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1494 if (ConstantSDNode *CN = getVecImm(N)) {
1495 return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i64);
1501 // If this is a vector of constants or undefs, get the bits. A bit in
1502 // UndefBits is set if the corresponding element of the vector is an
1503 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1504 // zero. Return true if this is not an array of constants, false if it is.
1506 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1507 uint64_t UndefBits[2]) {
1508 // Start with zero'd results.
1509 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1511 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1512 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1513 SDValue OpVal = BV->getOperand(i);
1515 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1516 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1518 uint64_t EltBits = 0;
1519 if (OpVal.getOpcode() == ISD::UNDEF) {
1520 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1521 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1523 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1524 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1525 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1526 const APFloat &apf = CN->getValueAPF();
1527 EltBits = (CN->getValueType(0) == MVT::f32
1528 ? FloatToBits(apf.convertToFloat())
1529 : DoubleToBits(apf.convertToDouble()));
1531 // Nonconstant element.
1535 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1538 //printf("%llx %llx %llx %llx\n",
1539 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1543 /// If this is a splat (repetition) of a value across the whole vector, return
1544 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1545 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1546 /// SplatSize = 1 byte.
1547 static bool isConstantSplat(const uint64_t Bits128[2],
1548 const uint64_t Undef128[2],
1550 uint64_t &SplatBits, uint64_t &SplatUndef,
1552 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1553 // the same as the lower 64-bits, ignoring undefs.
1554 uint64_t Bits64 = Bits128[0] | Bits128[1];
1555 uint64_t Undef64 = Undef128[0] & Undef128[1];
1556 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1557 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1558 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1559 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1561 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1562 if (MinSplatBits < 64) {
1564 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1566 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1567 if (MinSplatBits < 32) {
1569 // If the top 16-bits are different than the lower 16-bits, ignoring
1570 // undefs, we have an i32 splat.
1571 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1572 if (MinSplatBits < 16) {
1573 // If the top 8-bits are different than the lower 8-bits, ignoring
1574 // undefs, we have an i16 splat.
1575 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1576 == ((Bits16 >> 8) & ~Undef16)) {
1577 // Otherwise, we have an 8-bit splat.
1578 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1579 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1585 SplatUndef = Undef16;
1592 SplatUndef = Undef32;
1598 SplatBits = Bits128[0];
1599 SplatUndef = Undef128[0];
1605 return false; // Can't be a splat if two pieces don't match.
1608 // If this is a case we can't handle, return null and let the default
1609 // expansion code take care of it. If we CAN select this case, and if it
1610 // selects to a single instruction, return Op. Otherwise, if we can codegen
1611 // this case more efficiently than a constant pool load, lower it to the
1612 // sequence of ops that should be used.
1613 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1614 MVT VT = Op.getValueType();
1615 // If this is a vector of constants or undefs, get the bits. A bit in
1616 // UndefBits is set if the corresponding element of the vector is an
1617 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1619 uint64_t VectorBits[2];
1620 uint64_t UndefBits[2];
1621 uint64_t SplatBits, SplatUndef;
1623 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1624 || !isConstantSplat(VectorBits, UndefBits,
1625 VT.getVectorElementType().getSizeInBits(),
1626 SplatBits, SplatUndef, SplatSize))
1627 return SDValue(); // Not a constant vector, not a splat.
1629 switch (VT.getSimpleVT()) {
1632 uint32_t Value32 = SplatBits;
1633 assert(SplatSize == 4
1634 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1635 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1636 SDValue T = DAG.getConstant(Value32, MVT::i32);
1637 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1638 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1642 uint64_t f64val = SplatBits;
1643 assert(SplatSize == 8
1644 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1645 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1646 SDValue T = DAG.getConstant(f64val, MVT::i64);
1647 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1648 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1652 // 8-bit constants have to be expanded to 16-bits
1653 unsigned short Value16 = SplatBits | (SplatBits << 8);
1655 for (int i = 0; i < 8; ++i)
1656 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1657 return DAG.getNode(ISD::BIT_CONVERT, VT,
1658 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1661 unsigned short Value16;
1663 Value16 = (unsigned short) (SplatBits & 0xffff);
1665 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1666 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1668 for (int i = 0; i < 8; ++i) Ops[i] = T;
1669 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1672 unsigned int Value = SplatBits;
1673 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1674 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1677 uint64_t val = SplatBits;
1678 uint32_t upper = uint32_t(val >> 32);
1679 uint32_t lower = uint32_t(val);
1681 if (upper == lower) {
1682 // Magic constant that can be matched by IL, ILA, et. al.
1683 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1684 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1688 SmallVector<SDValue, 16> ShufBytes;
1690 bool upper_special, lower_special;
1692 // NOTE: This code creates common-case shuffle masks that can be easily
1693 // detected as common expressions. It is not attempting to create highly
1694 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1696 // Detect if the upper or lower half is a special shuffle mask pattern:
1697 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1698 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1700 // Create lower vector if not a special pattern
1701 if (!lower_special) {
1702 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1703 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1704 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1705 LO32C, LO32C, LO32C, LO32C));
1708 // Create upper vector if not a special pattern
1709 if (!upper_special) {
1710 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1711 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1712 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1713 HI32C, HI32C, HI32C, HI32C));
1716 // If either upper or lower are special, then the two input operands are
1717 // the same (basically, one of them is a "don't care")
1722 if (lower_special && upper_special) {
1723 // Unhappy situation... both upper and lower are special, so punt with
1724 // a target constant:
1725 SDValue Zero = DAG.getConstant(0, MVT::i32);
1726 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1730 for (int i = 0; i < 4; ++i) {
1732 for (int j = 0; j < 4; ++j) {
1734 bool process_upper, process_lower;
1736 process_upper = (upper_special && (i & 1) == 0);
1737 process_lower = (lower_special && (i & 1) == 1);
1739 if (process_upper || process_lower) {
1740 if ((process_upper && upper == 0)
1741 || (process_lower && lower == 0))
1743 else if ((process_upper && upper == 0xffffffff)
1744 || (process_lower && lower == 0xffffffff))
1746 else if ((process_upper && upper == 0x80000000)
1747 || (process_lower && lower == 0x80000000))
1748 val |= (j == 0 ? 0xe0 : 0x80);
1750 val |= i * 4 + j + ((i & 1) * 16);
1753 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1756 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1757 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1758 &ShufBytes[0], ShufBytes.size()));
1766 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1767 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1768 /// permutation vector, V3, is monotonically increasing with one "exception"
1769 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1770 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1771 /// In either case, the net result is going to eventually invoke SHUFB to
1772 /// permute/shuffle the bytes from V1 and V2.
1774 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1775 /// control word for byte/halfword/word insertion. This takes care of a single
1776 /// element move from V2 into V1.
1778 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1779 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1780 SDValue V1 = Op.getOperand(0);
1781 SDValue V2 = Op.getOperand(1);
1782 SDValue PermMask = Op.getOperand(2);
1784 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1786 // If we have a single element being moved from V1 to V2, this can be handled
1787 // using the C*[DX] compute mask instructions, but the vector elements have
1788 // to be monotonically increasing with one exception element.
1789 MVT EltVT = V1.getValueType().getVectorElementType();
1790 unsigned EltsFromV2 = 0;
1792 unsigned V2EltIdx0 = 0;
1793 unsigned CurrElt = 0;
1794 bool monotonic = true;
1795 if (EltVT == MVT::i8)
1797 else if (EltVT == MVT::i16)
1799 else if (EltVT == MVT::i32)
1802 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1804 for (unsigned i = 0, e = PermMask.getNumOperands();
1805 EltsFromV2 <= 1 && monotonic && i != e;
1808 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1811 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1813 if (SrcElt >= V2EltIdx0) {
1815 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1816 } else if (CurrElt != SrcElt) {
1823 if (EltsFromV2 == 1 && monotonic) {
1824 // Compute mask and shuffle
1825 MachineFunction &MF = DAG.getMachineFunction();
1826 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1827 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1828 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1829 // Initialize temporary register to 0
1830 SDValue InitTempReg =
1831 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1832 // Copy register's contents as index in INSERT_MASK:
1833 SDValue ShufMaskOp =
1834 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1835 DAG.getTargetConstant(V2Elt, MVT::i32),
1836 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1837 // Use shuffle mask in SHUFB synthetic instruction:
1838 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1840 // Convert the SHUFFLE_VECTOR mask's input element units to the
1842 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1844 SmallVector<SDValue, 16> ResultMask;
1845 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1847 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1850 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1852 for (unsigned j = 0; j < BytesPerElement; ++j) {
1853 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1858 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1859 &ResultMask[0], ResultMask.size());
1860 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1864 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1865 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1867 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1868 // For a constant, build the appropriate constant vector, which will
1869 // eventually simplify to a vector register load.
1871 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1872 SmallVector<SDValue, 16> ConstVecValues;
1876 // Create a constant vector:
1877 switch (Op.getValueType().getSimpleVT()) {
1878 default: assert(0 && "Unexpected constant value type in "
1879 "LowerSCALAR_TO_VECTOR");
1880 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1881 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1882 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1883 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1884 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1885 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1888 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1889 for (size_t j = 0; j < n_copies; ++j)
1890 ConstVecValues.push_back(CValue);
1892 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1893 &ConstVecValues[0], ConstVecValues.size());
1895 // Otherwise, copy the value from one register to another:
1896 switch (Op0.getValueType().getSimpleVT()) {
1897 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1904 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1911 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1912 switch (Op.getValueType().getSimpleVT()) {
1914 cerr << "CellSPU: Unknown vector multiplication, got "
1915 << Op.getValueType().getMVTString()
1921 SDValue rA = Op.getOperand(0);
1922 SDValue rB = Op.getOperand(1);
1923 SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1924 SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1925 SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1926 SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1928 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1932 // Multiply two v8i16 vectors (pipeline friendly version):
1933 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1934 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1935 // c) Use SELB to select upper and lower halves from the intermediate results
1937 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1938 // dual-issue. This code does manage to do this, even if it's a little on
1941 MachineFunction &MF = DAG.getMachineFunction();
1942 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1943 SDValue Chain = Op.getOperand(0);
1944 SDValue rA = Op.getOperand(0);
1945 SDValue rB = Op.getOperand(1);
1946 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1947 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1950 DAG.getCopyToReg(Chain, FSMBIreg,
1951 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1952 DAG.getConstant(0xcccc, MVT::i16)));
1955 DAG.getCopyToReg(FSMBOp, HiProdReg,
1956 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1958 SDValue HHProd_v4i32 =
1959 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1960 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1962 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1963 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1964 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1965 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1967 DAG.getConstant(16, MVT::i16))),
1968 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1971 // This M00sE is N@stI! (apologies to Monty Python)
1973 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1974 // is to break it all apart, sign extend, and reassemble the various
1975 // intermediate products.
1977 SDValue rA = Op.getOperand(0);
1978 SDValue rB = Op.getOperand(1);
1979 SDValue c8 = DAG.getConstant(8, MVT::i32);
1980 SDValue c16 = DAG.getConstant(16, MVT::i32);
1983 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1984 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1985 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1987 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1989 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1992 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1993 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1995 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1996 DAG.getConstant(0x2222, MVT::i16));
1998 SDValue LoProdParts =
1999 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2000 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2001 LLProd, LHProd, FSMBmask));
2003 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
2006 DAG.getNode(ISD::AND, MVT::v4i32,
2008 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2009 LoProdMask, LoProdMask,
2010 LoProdMask, LoProdMask));
2013 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2014 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
2017 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2018 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
2021 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2022 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2023 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2026 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2027 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2028 DAG.getNode(SPUISD::VEC_SRA,
2029 MVT::v4i32, rAH, c8)),
2030 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2031 DAG.getNode(SPUISD::VEC_SRA,
2032 MVT::v4i32, rBH, c8)));
2035 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2037 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2041 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2043 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2044 DAG.getNode(ISD::OR, MVT::v4i32,
2052 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2053 MachineFunction &MF = DAG.getMachineFunction();
2054 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2056 SDValue A = Op.getOperand(0);
2057 SDValue B = Op.getOperand(1);
2058 MVT VT = Op.getValueType();
2060 unsigned VRegBR, VRegC;
2062 if (VT == MVT::f32) {
2063 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2064 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2066 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2067 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2069 // TODO: make sure we're feeding FPInterp the right arguments
2070 // Right now: fi B, frest(B)
2073 // (Floating Interpolate (FP Reciprocal Estimate B))
2075 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2076 DAG.getNode(SPUISD::FPInterp, VT, B,
2077 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2079 // Computes A * BRcpl and stores in a temporary register
2081 DAG.getCopyToReg(BRcpl, VRegC,
2082 DAG.getNode(ISD::FMUL, VT, A,
2083 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2084 // What's the Chain variable do? It's magic!
2085 // TODO: set Chain = Op(0).getEntryNode()
2087 return DAG.getNode(ISD::FADD, VT,
2088 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2089 DAG.getNode(ISD::FMUL, VT,
2090 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2091 DAG.getNode(ISD::FSUB, VT, A,
2092 DAG.getNode(ISD::FMUL, VT, B,
2093 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2096 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2097 MVT VT = Op.getValueType();
2098 SDValue N = Op.getOperand(0);
2099 SDValue Elt = Op.getOperand(1);
2100 SDValue ShufMask[16];
2101 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2103 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2105 int EltNo = (int) C->getZExtValue();
2108 if (VT == MVT::i8 && EltNo >= 16)
2109 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2110 else if (VT == MVT::i16 && EltNo >= 8)
2111 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2112 else if (VT == MVT::i32 && EltNo >= 4)
2113 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2114 else if (VT == MVT::i64 && EltNo >= 2)
2115 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2117 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2118 // i32 and i64: Element 0 is the preferred slot
2119 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2122 // Need to generate shuffle mask and extract:
2123 int prefslot_begin = -1, prefslot_end = -1;
2124 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2126 switch (VT.getSimpleVT()) {
2128 assert(false && "Invalid value type!");
2130 prefslot_begin = prefslot_end = 3;
2134 prefslot_begin = 2; prefslot_end = 3;
2138 prefslot_begin = 0; prefslot_end = 3;
2142 prefslot_begin = 0; prefslot_end = 7;
2147 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2148 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2150 for (int i = 0; i < 16; ++i) {
2151 // zero fill uppper part of preferred slot, don't care about the
2153 unsigned int mask_val;
2155 if (i <= prefslot_end) {
2157 ((i < prefslot_begin)
2159 : elt_byte + (i - prefslot_begin));
2161 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2163 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2166 SDValue ShufMaskVec =
2167 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2169 sizeof(ShufMask) / sizeof(ShufMask[0]));
2171 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2172 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2173 N, N, ShufMaskVec));
2177 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2178 SDValue VecOp = Op.getOperand(0);
2179 SDValue ValOp = Op.getOperand(1);
2180 SDValue IdxOp = Op.getOperand(2);
2181 MVT VT = Op.getValueType();
2183 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2184 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2186 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2187 // Use $2 because it's always 16-byte aligned and it's available:
2188 SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2191 DAG.getNode(SPUISD::SHUFB, VT,
2192 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2194 DAG.getNode(SPUISD::INSERT_MASK, VT,
2195 DAG.getNode(ISD::ADD, PtrVT,
2197 DAG.getConstant(CN->getZExtValue(),
2203 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2205 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2207 assert(Op.getValueType() == MVT::i8);
2210 assert(0 && "Unhandled i8 math operator");
2214 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2216 SDValue N1 = Op.getOperand(1);
2217 N0 = (N0.getOpcode() != ISD::Constant
2218 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2219 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2221 N1 = (N1.getOpcode() != ISD::Constant
2222 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2223 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2225 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2226 DAG.getNode(Opc, MVT::i16, N0, N1));
2230 SDValue N1 = Op.getOperand(1);
2232 N0 = (N0.getOpcode() != ISD::Constant
2233 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2234 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2236 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2239 N1 = (N1.getOpcode() != ISD::Constant
2240 ? DAG.getNode(N1Opc, MVT::i16, N1)
2241 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2244 DAG.getNode(ISD::OR, MVT::i16, N0,
2245 DAG.getNode(ISD::SHL, MVT::i16,
2246 N0, DAG.getConstant(8, MVT::i16)));
2247 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2248 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2252 SDValue N1 = Op.getOperand(1);
2254 N0 = (N0.getOpcode() != ISD::Constant
2255 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2256 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2258 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2261 N1 = (N1.getOpcode() != ISD::Constant
2262 ? DAG.getNode(N1Opc, MVT::i16, N1)
2263 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2265 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2266 DAG.getNode(Opc, MVT::i16, N0, N1));
2269 SDValue N1 = Op.getOperand(1);
2271 N0 = (N0.getOpcode() != ISD::Constant
2272 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2273 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2275 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2278 N1 = (N1.getOpcode() != ISD::Constant
2279 ? DAG.getNode(N1Opc, MVT::i16, N1)
2280 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2282 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2283 DAG.getNode(Opc, MVT::i16, N0, N1));
2286 SDValue N1 = Op.getOperand(1);
2288 N0 = (N0.getOpcode() != ISD::Constant
2289 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2290 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2292 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2293 N1 = (N1.getOpcode() != ISD::Constant
2294 ? DAG.getNode(N1Opc, MVT::i16, N1)
2295 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2297 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2298 DAG.getNode(Opc, MVT::i16, N0, N1));
2306 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2308 MVT VT = Op.getValueType();
2309 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2311 SDValue Op0 = Op.getOperand(0);
2314 case ISD::ZERO_EXTEND:
2315 case ISD::SIGN_EXTEND:
2316 case ISD::ANY_EXTEND: {
2317 MVT Op0VT = Op0.getValueType();
2318 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2320 assert(Op0VT == MVT::i32
2321 && "CellSPU: Zero/sign extending something other than i32");
2322 DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2324 unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2325 ? SPUISD::ROTBYTES_RIGHT_S
2326 : SPUISD::ROTQUAD_RZ_BYTES);
2327 SDValue PromoteScalar =
2328 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2330 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2331 DAG.getNode(ISD::BIT_CONVERT, VecVT,
2332 DAG.getNode(NewOpc, Op0VecVT,
2334 DAG.getConstant(4, MVT::i32))));
2338 // Turn operands into vectors to satisfy type checking (shufb works on
2341 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2343 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2344 SmallVector<SDValue, 16> ShufBytes;
2346 // Create the shuffle mask for "rotating" the borrow up one register slot
2347 // once the borrow is generated.
2348 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2349 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2350 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2351 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2354 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2355 SDValue ShiftedCarry =
2356 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2358 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2359 &ShufBytes[0], ShufBytes.size()));
2361 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2362 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2363 Op0, Op1, ShiftedCarry));
2367 // Turn operands into vectors to satisfy type checking (shufb works on
2370 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2372 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2373 SmallVector<SDValue, 16> ShufBytes;
2375 // Create the shuffle mask for "rotating" the borrow up one register slot
2376 // once the borrow is generated.
2377 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2378 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2379 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2380 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2383 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2384 SDValue ShiftedBorrow =
2385 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2386 BorrowGen, BorrowGen,
2387 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2388 &ShufBytes[0], ShufBytes.size()));
2390 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2391 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2392 Op0, Op1, ShiftedBorrow));
2396 SDValue ShiftAmt = Op.getOperand(1);
2397 MVT ShiftAmtVT = ShiftAmt.getValueType();
2398 SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2400 DAG.getNode(SPUISD::SELB, VecVT,
2402 DAG.getConstant(0, VecVT),
2403 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2404 DAG.getConstant(0xff00ULL, MVT::i16)));
2405 SDValue ShiftAmtBytes =
2406 DAG.getNode(ISD::SRL, ShiftAmtVT,
2408 DAG.getConstant(3, ShiftAmtVT));
2409 SDValue ShiftAmtBits =
2410 DAG.getNode(ISD::AND, ShiftAmtVT,
2412 DAG.getConstant(7, ShiftAmtVT));
2414 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2415 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2416 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2417 MaskLower, ShiftAmtBytes),
2422 MVT VT = Op.getValueType();
2423 SDValue ShiftAmt = Op.getOperand(1);
2424 MVT ShiftAmtVT = ShiftAmt.getValueType();
2425 SDValue ShiftAmtBytes =
2426 DAG.getNode(ISD::SRL, ShiftAmtVT,
2428 DAG.getConstant(3, ShiftAmtVT));
2429 SDValue ShiftAmtBits =
2430 DAG.getNode(ISD::AND, ShiftAmtVT,
2432 DAG.getConstant(7, ShiftAmtVT));
2434 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2435 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2436 Op0, ShiftAmtBytes),
2441 // Promote Op0 to vector
2443 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2444 SDValue ShiftAmt = Op.getOperand(1);
2445 MVT ShiftVT = ShiftAmt.getValueType();
2447 // Negate variable shift amounts
2448 if (!isa<ConstantSDNode>(ShiftAmt)) {
2449 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2450 DAG.getConstant(0, ShiftVT), ShiftAmt);
2453 SDValue UpperHalfSign =
2454 DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2455 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2456 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2457 Op0, DAG.getConstant(31, MVT::i32))));
2458 SDValue UpperHalfSignMask =
2459 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2460 SDValue UpperLowerMask =
2461 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2462 DAG.getConstant(0xff00, MVT::i16));
2463 SDValue UpperLowerSelect =
2464 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2465 UpperHalfSignMask, Op0, UpperLowerMask);
2466 SDValue RotateLeftBytes =
2467 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2468 UpperLowerSelect, ShiftAmt);
2469 SDValue RotateLeftBits =
2470 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2471 RotateLeftBytes, ShiftAmt);
2473 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2481 //! Lower byte immediate operations for v16i8 vectors:
2483 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2486 MVT VT = Op.getValueType();
2488 ConstVec = Op.getOperand(0);
2489 Arg = Op.getOperand(1);
2490 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2491 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2492 ConstVec = ConstVec.getOperand(0);
2494 ConstVec = Op.getOperand(1);
2495 Arg = Op.getOperand(0);
2496 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2497 ConstVec = ConstVec.getOperand(0);
2502 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2503 uint64_t VectorBits[2];
2504 uint64_t UndefBits[2];
2505 uint64_t SplatBits, SplatUndef;
2508 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2509 && isConstantSplat(VectorBits, UndefBits,
2510 VT.getVectorElementType().getSizeInBits(),
2511 SplatBits, SplatUndef, SplatSize)) {
2513 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2514 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2516 // Turn the BUILD_VECTOR into a set of target constants:
2517 for (size_t i = 0; i < tcVecSize; ++i)
2520 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2521 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2524 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2525 // lowered. Return the operation, rather than a null SDValue.
2529 //! Lower i32 multiplication
2530 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2532 switch (VT.getSimpleVT()) {
2534 cerr << "CellSPU: Unknown LowerMUL value type, got "
2535 << Op.getValueType().getMVTString()
2541 SDValue rA = Op.getOperand(0);
2542 SDValue rB = Op.getOperand(1);
2544 return DAG.getNode(ISD::ADD, MVT::i32,
2545 DAG.getNode(ISD::ADD, MVT::i32,
2546 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2547 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2548 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2555 //! Custom lowering for CTPOP (count population)
2557 Custom lowering code that counts the number ones in the input
2558 operand. SPU has such an instruction, but it counts the number of
2559 ones per byte, which then have to be accumulated.
2561 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2562 MVT VT = Op.getValueType();
2563 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2565 switch (VT.getSimpleVT()) {
2567 assert(false && "Invalid value type!");
2569 SDValue N = Op.getOperand(0);
2570 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2572 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2573 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2575 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2579 MachineFunction &MF = DAG.getMachineFunction();
2580 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2582 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2584 SDValue N = Op.getOperand(0);
2585 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2586 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2587 SDValue Shift1 = DAG.getConstant(8, MVT::i16);
2589 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2590 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2592 // CNTB_result becomes the chain to which all of the virtual registers
2593 // CNTB_reg, SUM1_reg become associated:
2594 SDValue CNTB_result =
2595 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2597 SDValue CNTB_rescopy =
2598 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2600 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2602 return DAG.getNode(ISD::AND, MVT::i16,
2603 DAG.getNode(ISD::ADD, MVT::i16,
2604 DAG.getNode(ISD::SRL, MVT::i16,
2611 MachineFunction &MF = DAG.getMachineFunction();
2612 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2614 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2615 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2617 SDValue N = Op.getOperand(0);
2618 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2619 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2620 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2621 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2623 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2624 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2626 // CNTB_result becomes the chain to which all of the virtual registers
2627 // CNTB_reg, SUM1_reg become associated:
2628 SDValue CNTB_result =
2629 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2631 SDValue CNTB_rescopy =
2632 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2635 DAG.getNode(ISD::SRL, MVT::i32,
2636 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2639 DAG.getNode(ISD::ADD, MVT::i32,
2640 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2642 SDValue Sum1_rescopy =
2643 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2646 DAG.getNode(ISD::SRL, MVT::i32,
2647 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2650 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2651 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2653 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2663 /// LowerOperation - Provide custom lowering hooks for some operations.
2666 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2668 unsigned Opc = (unsigned) Op.getOpcode();
2669 MVT VT = Op.getValueType();
2673 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2674 cerr << "Op.getOpcode() = " << Opc << "\n";
2675 cerr << "*Op.getNode():\n";
2676 Op.getNode()->dump();
2682 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2684 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2685 case ISD::ConstantPool:
2686 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2687 case ISD::GlobalAddress:
2688 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2689 case ISD::JumpTable:
2690 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2692 return LowerConstant(Op, DAG);
2693 case ISD::ConstantFP:
2694 return LowerConstantFP(Op, DAG);
2696 return LowerBRCOND(Op, DAG);
2697 case ISD::FORMAL_ARGUMENTS:
2698 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2700 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2702 return LowerRET(Op, DAG, getTargetMachine());
2705 // i8, i64 math ops:
2706 case ISD::ZERO_EXTEND:
2707 case ISD::SIGN_EXTEND:
2708 case ISD::ANY_EXTEND:
2717 return LowerI8Math(Op, DAG, Opc);
2718 else if (VT == MVT::i64)
2719 return LowerI64Math(Op, DAG, Opc);
2723 // Vector-related lowering.
2724 case ISD::BUILD_VECTOR:
2725 return LowerBUILD_VECTOR(Op, DAG);
2726 case ISD::SCALAR_TO_VECTOR:
2727 return LowerSCALAR_TO_VECTOR(Op, DAG);
2728 case ISD::VECTOR_SHUFFLE:
2729 return LowerVECTOR_SHUFFLE(Op, DAG);
2730 case ISD::EXTRACT_VECTOR_ELT:
2731 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2732 case ISD::INSERT_VECTOR_ELT:
2733 return LowerINSERT_VECTOR_ELT(Op, DAG);
2735 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2739 return LowerByteImmed(Op, DAG);
2741 // Vector and i8 multiply:
2744 return LowerVectorMUL(Op, DAG);
2745 else if (VT == MVT::i8)
2746 return LowerI8Math(Op, DAG, Opc);
2748 return LowerMUL(Op, DAG, VT, Opc);
2751 if (VT == MVT::f32 || VT == MVT::v4f32)
2752 return LowerFDIVf32(Op, DAG);
2753 // else if (Op.getValueType() == MVT::f64)
2754 // return LowerFDIVf64(Op, DAG);
2756 assert(0 && "Calling FDIV on unsupported MVT");
2759 return LowerCTPOP(Op, DAG);
2765 //===----------------------------------------------------------------------===//
2766 // Target Optimization Hooks
2767 //===----------------------------------------------------------------------===//
2770 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2773 TargetMachine &TM = getTargetMachine();
2775 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2776 SelectionDAG &DAG = DCI.DAG;
2777 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2778 SDValue Result; // Initially, NULL result
2780 switch (N->getOpcode()) {
2783 SDValue Op1 = N->getOperand(1);
2785 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2786 SDValue Op01 = Op0.getOperand(1);
2787 if (Op01.getOpcode() == ISD::Constant
2788 || Op01.getOpcode() == ISD::TargetConstant) {
2789 // (add <const>, (SPUindirect <arg>, <const>)) ->
2790 // (SPUindirect <arg>, <const + const>)
2791 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2792 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2793 SDValue combinedConst =
2794 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2795 Op0.getValueType());
2797 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2798 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2799 DEBUG(cerr << "With: (SPUindirect <arg>, "
2800 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2801 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2802 Op0.getOperand(0), combinedConst);
2804 } else if (isa<ConstantSDNode>(Op0)
2805 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2806 SDValue Op11 = Op1.getOperand(1);
2807 if (Op11.getOpcode() == ISD::Constant
2808 || Op11.getOpcode() == ISD::TargetConstant) {
2809 // (add (SPUindirect <arg>, <const>), <const>) ->
2810 // (SPUindirect <arg>, <const + const>)
2811 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2812 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2813 SDValue combinedConst =
2814 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2815 Op0.getValueType());
2817 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2818 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2819 DEBUG(cerr << "With: (SPUindirect <arg>, "
2820 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2822 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2823 Op1.getOperand(0), combinedConst);
2828 case ISD::SIGN_EXTEND:
2829 case ISD::ZERO_EXTEND:
2830 case ISD::ANY_EXTEND: {
2831 if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2832 N->getValueType(0) == Op0.getValueType()) {
2833 // (any_extend (SPUextract_elt0 <arg>)) ->
2834 // (SPUextract_elt0 <arg>)
2835 // Types must match, however...
2836 DEBUG(cerr << "Replace: ");
2837 DEBUG(N->dump(&DAG));
2838 DEBUG(cerr << "\nWith: ");
2839 DEBUG(Op0.getNode()->dump(&DAG));
2840 DEBUG(cerr << "\n");
2846 case SPUISD::IndirectAddr: {
2847 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2848 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2849 if (CN->getZExtValue() == 0) {
2850 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2851 // (SPUaform <addr>, 0)
2853 DEBUG(cerr << "Replace: ");
2854 DEBUG(N->dump(&DAG));
2855 DEBUG(cerr << "\nWith: ");
2856 DEBUG(Op0.getNode()->dump(&DAG));
2857 DEBUG(cerr << "\n");
2864 case SPUISD::SHLQUAD_L_BITS:
2865 case SPUISD::SHLQUAD_L_BYTES:
2866 case SPUISD::VEC_SHL:
2867 case SPUISD::VEC_SRL:
2868 case SPUISD::VEC_SRA:
2869 case SPUISD::ROTQUAD_RZ_BYTES:
2870 case SPUISD::ROTQUAD_RZ_BITS: {
2871 SDValue Op1 = N->getOperand(1);
2873 if (isa<ConstantSDNode>(Op1)) {
2874 // Kill degenerate vector shifts:
2875 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2877 if (CN->getZExtValue() == 0) {
2883 case SPUISD::PROMOTE_SCALAR: {
2884 switch (Op0.getOpcode()) {
2887 case ISD::ANY_EXTEND:
2888 case ISD::ZERO_EXTEND:
2889 case ISD::SIGN_EXTEND: {
2890 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2892 // but only if the SPUpromote_scalar and <arg> types match.
2893 SDValue Op00 = Op0.getOperand(0);
2894 if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2895 SDValue Op000 = Op00.getOperand(0);
2896 if (Op000.getValueType() == N->getValueType(0)) {
2902 case SPUISD::EXTRACT_ELT0: {
2903 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2905 Result = Op0.getOperand(0);
2912 // Otherwise, return unchanged.
2914 if (Result.getNode()) {
2915 DEBUG(cerr << "\nReplace.SPU: ");
2916 DEBUG(N->dump(&DAG));
2917 DEBUG(cerr << "\nWith: ");
2918 DEBUG(Result.getNode()->dump(&DAG));
2919 DEBUG(cerr << "\n");
2926 //===----------------------------------------------------------------------===//
2927 // Inline Assembly Support
2928 //===----------------------------------------------------------------------===//
2930 /// getConstraintType - Given a constraint letter, return the type of
2931 /// constraint it is for this target.
2932 SPUTargetLowering::ConstraintType
2933 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2934 if (ConstraintLetter.size() == 1) {
2935 switch (ConstraintLetter[0]) {
2942 return C_RegisterClass;
2945 return TargetLowering::getConstraintType(ConstraintLetter);
2948 std::pair<unsigned, const TargetRegisterClass*>
2949 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2952 if (Constraint.size() == 1) {
2953 // GCC RS6000 Constraint Letters
2954 switch (Constraint[0]) {
2958 return std::make_pair(0U, SPU::R64CRegisterClass);
2959 return std::make_pair(0U, SPU::R32CRegisterClass);
2962 return std::make_pair(0U, SPU::R32FPRegisterClass);
2963 else if (VT == MVT::f64)
2964 return std::make_pair(0U, SPU::R64FPRegisterClass);
2967 return std::make_pair(0U, SPU::GPRCRegisterClass);
2971 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2974 //! Compute used/known bits for a SPU operand
2976 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2980 const SelectionDAG &DAG,
2981 unsigned Depth ) const {
2983 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2986 switch (Op.getOpcode()) {
2988 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2998 case SPUISD::PROMOTE_SCALAR: {
2999 SDValue Op0 = Op.getOperand(0);
3000 MVT Op0VT = Op0.getValueType();
3001 unsigned Op0VTBits = Op0VT.getSizeInBits();
3002 uint64_t InMask = Op0VT.getIntegerVTBitMask();
3003 KnownZero |= APInt(Op0VTBits, ~InMask, false);
3004 KnownOne |= APInt(Op0VTBits, InMask, false);
3008 case SPUISD::LDRESULT:
3009 case SPUISD::EXTRACT_ELT0:
3010 case SPUISD::EXTRACT_ELT0_CHAINED: {
3011 MVT OpVT = Op.getValueType();
3012 unsigned OpVTBits = OpVT.getSizeInBits();
3013 uint64_t InMask = OpVT.getIntegerVTBitMask();
3014 KnownZero |= APInt(OpVTBits, ~InMask, false);
3015 KnownOne |= APInt(OpVTBits, InMask, false);
3020 case EXTRACT_I1_ZEXT:
3021 case EXTRACT_I1_SEXT:
3022 case EXTRACT_I8_ZEXT:
3023 case EXTRACT_I8_SEXT:
3028 case SPUISD::SHLQUAD_L_BITS:
3029 case SPUISD::SHLQUAD_L_BYTES:
3030 case SPUISD::VEC_SHL:
3031 case SPUISD::VEC_SRL:
3032 case SPUISD::VEC_SRA:
3033 case SPUISD::VEC_ROTL:
3034 case SPUISD::VEC_ROTR:
3035 case SPUISD::ROTQUAD_RZ_BYTES:
3036 case SPUISD::ROTQUAD_RZ_BITS:
3037 case SPUISD::ROTBYTES_RIGHT_S:
3038 case SPUISD::ROTBYTES_LEFT:
3039 case SPUISD::ROTBYTES_LEFT_CHAINED:
3040 case SPUISD::SELECT_MASK:
3042 case SPUISD::FPInterp:
3043 case SPUISD::FPRecipEst:
3044 case SPUISD::SEXT32TO64:
3049 // LowerAsmOperandForConstraint
3051 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3052 char ConstraintLetter,
3053 std::vector<SDValue> &Ops,
3054 SelectionDAG &DAG) const {
3055 // Default, for the time being, to the base class handler
3056 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3059 /// isLegalAddressImmediate - Return true if the integer value can be used
3060 /// as the offset of the target addressing mode.
3061 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3062 const Type *Ty) const {
3063 // SPU's addresses are 256K:
3064 return (V > -(1 << 18) && V < (1 << 18) - 1);
3067 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {