1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT mapping to useful data for Cell SPU
41 struct valtype_map_s {
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDValue &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDValue &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // SPU has no sign or zero extended loads for i1, i8, i16:
134 setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote);
135 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
136 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
137 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
138 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
139 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
140 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
141 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
143 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
144 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
145 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
146 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
147 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
148 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
149 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
150 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
152 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
153 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
154 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
156 // SPU constant load actions are custom lowered:
157 setOperationAction(ISD::Constant, MVT::i64, Custom);
158 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
159 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
161 // SPU's loads and stores have to be custom lowered:
162 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
164 MVT VT = (MVT::SimpleValueType)sctype;
166 setOperationAction(ISD::LOAD, VT, Custom);
167 setOperationAction(ISD::STORE, VT, Custom);
170 // Custom lower BRCOND for i1, i8 to "promote" the result to
171 // i32 and i16, respectively.
172 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
174 // Expand the jumptable branches
175 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
176 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
177 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
179 // SPU has no intrinsics for these particular operations:
180 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
182 // PowerPC has no SREM/UREM instructions
183 setOperationAction(ISD::SREM, MVT::i32, Expand);
184 setOperationAction(ISD::UREM, MVT::i32, Expand);
185 setOperationAction(ISD::SREM, MVT::i64, Expand);
186 setOperationAction(ISD::UREM, MVT::i64, Expand);
188 // We don't support sin/cos/sqrt/fmod
189 setOperationAction(ISD::FSIN , MVT::f64, Expand);
190 setOperationAction(ISD::FCOS , MVT::f64, Expand);
191 setOperationAction(ISD::FREM , MVT::f64, Expand);
192 setOperationAction(ISD::FLOG , MVT::f64, Expand);
193 setOperationAction(ISD::FLOG2, MVT::f64, Expand);
194 setOperationAction(ISD::FLOG10,MVT::f64, Expand);
195 setOperationAction(ISD::FEXP , MVT::f64, Expand);
196 setOperationAction(ISD::FEXP2, MVT::f64, Expand);
197 setOperationAction(ISD::FSIN , MVT::f32, Expand);
198 setOperationAction(ISD::FCOS , MVT::f32, Expand);
199 setOperationAction(ISD::FREM , MVT::f32, Expand);
200 setOperationAction(ISD::FLOG , MVT::f32, Expand);
201 setOperationAction(ISD::FLOG2, MVT::f32, Expand);
202 setOperationAction(ISD::FLOG10,MVT::f32, Expand);
203 setOperationAction(ISD::FEXP , MVT::f32, Expand);
204 setOperationAction(ISD::FEXP2, MVT::f32, Expand);
206 // If we're enabling GP optimizations, use hardware square root
207 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
208 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
210 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
211 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
213 // SPU can do rotate right and left, so legalize it... but customize for i8
214 // because instructions don't exist.
216 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
218 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
219 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
220 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
222 setOperationAction(ISD::ROTL, MVT::i32, Legal);
223 setOperationAction(ISD::ROTL, MVT::i16, Legal);
224 setOperationAction(ISD::ROTL, MVT::i8, Custom);
225 // SPU has no native version of shift left/right for i8
226 setOperationAction(ISD::SHL, MVT::i8, Custom);
227 setOperationAction(ISD::SRL, MVT::i8, Custom);
228 setOperationAction(ISD::SRA, MVT::i8, Custom);
229 // And SPU needs custom lowering for shift left/right for i64
230 setOperationAction(ISD::SHL, MVT::i64, Custom);
231 setOperationAction(ISD::SRL, MVT::i64, Custom);
232 setOperationAction(ISD::SRA, MVT::i64, Custom);
234 // Custom lower i8, i32 and i64 multiplications
235 setOperationAction(ISD::MUL, MVT::i8, Custom);
236 setOperationAction(ISD::MUL, MVT::i32, Custom);
237 setOperationAction(ISD::MUL, MVT::i64, Custom);
239 // Need to custom handle (some) common i8, i64 math ops
240 setOperationAction(ISD::ADD, MVT::i64, Custom);
241 setOperationAction(ISD::SUB, MVT::i8, Custom);
242 setOperationAction(ISD::SUB, MVT::i64, Custom);
244 // SPU does not have BSWAP. It does have i32 support CTLZ.
245 // CTPOP has to be custom lowered.
246 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
247 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
249 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
250 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
251 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
252 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
254 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
255 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
257 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
259 // SPU has a version of select that implements (a&~c)|(b&c), just like
260 // select ought to work:
261 setOperationAction(ISD::SELECT, MVT::i1, Promote);
262 setOperationAction(ISD::SELECT, MVT::i8, Legal);
263 setOperationAction(ISD::SELECT, MVT::i16, Legal);
264 setOperationAction(ISD::SELECT, MVT::i32, Legal);
265 setOperationAction(ISD::SELECT, MVT::i64, Expand);
267 setOperationAction(ISD::SETCC, MVT::i1, Promote);
268 setOperationAction(ISD::SETCC, MVT::i8, Legal);
269 setOperationAction(ISD::SETCC, MVT::i16, Legal);
270 setOperationAction(ISD::SETCC, MVT::i32, Legal);
271 setOperationAction(ISD::SETCC, MVT::i64, Expand);
273 // Zero extension and sign extension for i64 have to be
275 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
276 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
277 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
279 // SPU has a legal FP -> signed INT instruction
280 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
281 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
282 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
283 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
285 // FDIV on SPU requires custom lowering
286 setOperationAction(ISD::FDIV, MVT::f32, Custom);
287 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
289 // SPU has [U|S]INT_TO_FP
290 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
291 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
292 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
293 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
294 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
295 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
296 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
297 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
299 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
300 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
301 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
302 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
304 // We cannot sextinreg(i1). Expand to shifts.
305 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
307 // Support label based line numbers.
308 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
309 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
311 // We want to legalize GlobalAddress and ConstantPool nodes into the
312 // appropriate instructions to materialize the address.
313 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
315 MVT VT = (MVT::SimpleValueType)sctype;
317 setOperationAction(ISD::GlobalAddress, VT, Custom);
318 setOperationAction(ISD::ConstantPool, VT, Custom);
319 setOperationAction(ISD::JumpTable, VT, Custom);
322 // RET must be custom lowered, to meet ABI requirements
323 setOperationAction(ISD::RET, MVT::Other, Custom);
325 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
326 setOperationAction(ISD::VASTART , MVT::Other, Custom);
328 // Use the default implementation.
329 setOperationAction(ISD::VAARG , MVT::Other, Expand);
330 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
331 setOperationAction(ISD::VAEND , MVT::Other, Expand);
332 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
333 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
334 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
335 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
337 // Cell SPU has instructions for converting between i64 and fp.
338 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
339 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
341 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
342 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
344 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
345 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
347 // First set operation action for all vector types to expand. Then we
348 // will selectively turn on ones that can be effectively codegen'd.
349 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
350 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
351 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
352 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
353 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
354 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
356 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
357 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
358 MVT VT = (MVT::SimpleValueType)i;
360 // add/sub are legal for all supported vector VT's.
361 setOperationAction(ISD::ADD , VT, Legal);
362 setOperationAction(ISD::SUB , VT, Legal);
363 // mul has to be custom lowered.
364 setOperationAction(ISD::MUL , VT, Custom);
366 setOperationAction(ISD::AND , VT, Legal);
367 setOperationAction(ISD::OR , VT, Legal);
368 setOperationAction(ISD::XOR , VT, Legal);
369 setOperationAction(ISD::LOAD , VT, Legal);
370 setOperationAction(ISD::SELECT, VT, Legal);
371 setOperationAction(ISD::STORE, VT, Legal);
373 // These operations need to be expanded:
374 setOperationAction(ISD::SDIV, VT, Expand);
375 setOperationAction(ISD::SREM, VT, Expand);
376 setOperationAction(ISD::UDIV, VT, Expand);
377 setOperationAction(ISD::UREM, VT, Expand);
378 setOperationAction(ISD::FDIV, VT, Custom);
380 // Custom lower build_vector, constant pool spills, insert and
381 // extract vector elements:
382 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
383 setOperationAction(ISD::ConstantPool, VT, Custom);
384 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
385 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
386 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
387 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
390 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
391 setOperationAction(ISD::AND, MVT::v16i8, Custom);
392 setOperationAction(ISD::OR, MVT::v16i8, Custom);
393 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
394 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
396 setShiftAmountType(MVT::i32);
397 setSetCCResultContents(ZeroOrOneSetCCResult);
399 setStackPointerRegisterToSaveRestore(SPU::R1);
401 // We have target-specific dag combine patterns for the following nodes:
402 setTargetDAGCombine(ISD::ADD);
403 setTargetDAGCombine(ISD::ZERO_EXTEND);
404 setTargetDAGCombine(ISD::SIGN_EXTEND);
405 setTargetDAGCombine(ISD::ANY_EXTEND);
407 computeRegisterProperties();
411 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
413 if (node_names.empty()) {
414 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
415 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
416 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
417 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
418 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
419 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
420 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
421 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
422 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
423 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
424 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
425 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
426 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
427 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED]
428 = "SPUISD::EXTRACT_ELT0_CHAINED";
429 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
430 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
431 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
432 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
433 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
434 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
435 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
436 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
437 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
438 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
439 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
440 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
441 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
442 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
443 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
444 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
445 "SPUISD::ROTQUAD_RZ_BYTES";
446 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
447 "SPUISD::ROTQUAD_RZ_BITS";
448 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
449 "SPUISD::ROTBYTES_RIGHT_S";
450 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
451 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
452 "SPUISD::ROTBYTES_LEFT_CHAINED";
453 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
454 "SPUISD::ROTBYTES_LEFT_BITS";
455 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
456 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
457 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
458 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
459 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
460 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
461 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
462 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
463 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
466 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
468 return ((i != node_names.end()) ? i->second : 0);
471 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
472 MVT VT = Op.getValueType();
479 //===----------------------------------------------------------------------===//
480 // Calling convention code:
481 //===----------------------------------------------------------------------===//
483 #include "SPUGenCallingConv.inc"
485 //===----------------------------------------------------------------------===//
486 // LowerOperation implementation
487 //===----------------------------------------------------------------------===//
489 /// Aligned load common code for CellSPU
491 \param[in] Op The SelectionDAG load or store operand
492 \param[in] DAG The selection DAG
493 \param[in] ST CellSPU subtarget information structure
494 \param[in,out] alignment Caller initializes this to the load or store node's
495 value from getAlignment(), may be updated while generating the aligned load
496 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
497 offset (divisible by 16, modulo 16 == 0)
498 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
499 offset of the preferred slot (modulo 16 != 0)
500 \param[in,out] VT Caller initializes this value type to the the load or store
501 node's loaded or stored value type; may be updated if an i1-extended load or
503 \param[out] was16aligned true if the base pointer had 16-byte alignment,
504 otherwise false. Can help to determine if the chunk needs to be rotated.
506 Both load and store lowering load a block of data aligned on a 16-byte
507 boundary. This is the common aligned load code shared between both.
510 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
512 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
513 MVT &VT, bool &was16aligned)
515 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
516 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
517 SDValue basePtr = LSN->getBasePtr();
518 SDValue chain = LSN->getChain();
520 if (basePtr.getOpcode() == ISD::ADD) {
521 SDValue Op1 = basePtr.getNode()->getOperand(1);
523 if (Op1.getOpcode() == ISD::Constant
524 || Op1.getOpcode() == ISD::TargetConstant) {
525 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
527 alignOffs = (int) CN->getZExtValue();
528 prefSlotOffs = (int) (alignOffs & 0xf);
530 // Adjust the rotation amount to ensure that the final result ends up in
531 // the preferred slot:
532 prefSlotOffs -= vtm->prefslot_byte;
533 basePtr = basePtr.getOperand(0);
535 // Loading from memory, can we adjust alignment?
536 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
537 SDValue APtr = basePtr.getOperand(0);
538 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
539 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
540 alignment = GSDN->getGlobal()->getAlignment();
545 prefSlotOffs = -vtm->prefslot_byte;
547 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
548 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
549 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
550 prefSlotOffs = (int) (alignOffs & 0xf);
551 prefSlotOffs -= vtm->prefslot_byte;
552 basePtr = DAG.getRegister(SPU::R1, VT);
555 prefSlotOffs = -vtm->prefslot_byte;
558 if (alignment == 16) {
559 // Realign the base pointer as a D-Form address:
560 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
561 basePtr = DAG.getNode(ISD::ADD, PtrVT,
563 DAG.getConstant((alignOffs & ~0xf), PtrVT));
566 // Emit the vector load:
568 return DAG.getLoad(MVT::v16i8, chain, basePtr,
569 LSN->getSrcValue(), LSN->getSrcValueOffset(),
570 LSN->isVolatile(), 16);
573 // Unaligned load or we're using the "large memory" model, which means that
574 // we have to be very pessimistic:
575 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
576 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
577 DAG.getConstant(0, PtrVT));
581 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
582 DAG.getConstant((alignOffs & ~0xf), PtrVT));
583 was16aligned = false;
584 return DAG.getLoad(MVT::v16i8, chain, basePtr,
585 LSN->getSrcValue(), LSN->getSrcValueOffset(),
586 LSN->isVolatile(), 16);
589 /// Custom lower loads for CellSPU
591 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
592 within a 16-byte block, we have to rotate to extract the requested element.
595 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
596 LoadSDNode *LN = cast<LoadSDNode>(Op);
597 SDValue the_chain = LN->getChain();
598 MVT VT = LN->getMemoryVT();
599 MVT OpVT = Op.getNode()->getValueType(0);
600 ISD::LoadExtType ExtType = LN->getExtensionType();
601 unsigned alignment = LN->getAlignment();
604 switch (LN->getAddressingMode()) {
605 case ISD::UNINDEXED: {
609 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
611 if (result.getNode() == 0)
614 the_chain = result.getValue(1);
615 // Rotate the chunk if necessary
618 if (rotamt != 0 || !was16aligned) {
619 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
624 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
626 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
627 LoadSDNode *LN1 = cast<LoadSDNode>(result);
628 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
629 DAG.getConstant(rotamt, PtrVT));
632 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
633 the_chain = result.getValue(1);
636 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
638 MVT vecVT = MVT::v16i8;
640 // Convert the loaded v16i8 vector to the appropriate vector type
641 // specified by the operand:
644 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
646 vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
649 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
650 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
651 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
652 the_chain = result.getValue(1);
654 // Handle the sign and zero-extending loads for i1 and i8:
657 if (ExtType == ISD::SEXTLOAD) {
658 NewOpC = (OpVT == MVT::i1
659 ? SPUISD::EXTRACT_I1_SEXT
660 : SPUISD::EXTRACT_I8_SEXT);
662 assert(ExtType == ISD::ZEXTLOAD);
663 NewOpC = (OpVT == MVT::i1
664 ? SPUISD::EXTRACT_I1_ZEXT
665 : SPUISD::EXTRACT_I8_ZEXT);
668 result = DAG.getNode(NewOpC, OpVT, result);
671 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
672 SDValue retops[2] = {
677 result = DAG.getNode(SPUISD::LDRESULT, retvts,
678 retops, sizeof(retops) / sizeof(retops[0]));
685 case ISD::LAST_INDEXED_MODE:
686 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
688 cerr << (unsigned) LN->getAddressingMode() << "\n";
696 /// Custom lower stores for CellSPU
698 All CellSPU stores are aligned to 16-byte boundaries, so for elements
699 within a 16-byte block, we have to generate a shuffle to insert the
700 requested element into its place, then store the resulting block.
703 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
704 StoreSDNode *SN = cast<StoreSDNode>(Op);
705 SDValue Value = SN->getValue();
706 MVT VT = Value.getValueType();
707 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
708 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
709 unsigned alignment = SN->getAlignment();
711 switch (SN->getAddressingMode()) {
712 case ISD::UNINDEXED: {
713 int chunk_offset, slot_offset;
716 // The vector type we really want to load from the 16-byte chunk, except
717 // in the case of MVT::i1, which has to be v16i8.
718 MVT vecVT, stVecVT = MVT::v16i8;
721 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
722 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
724 SDValue alignLoadVec =
725 AlignedLoad(Op, DAG, ST, SN, alignment,
726 chunk_offset, slot_offset, VT, was16aligned);
728 if (alignLoadVec.getNode() == 0)
731 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
732 SDValue basePtr = LN->getBasePtr();
733 SDValue the_chain = alignLoadVec.getValue(1);
734 SDValue theValue = SN->getValue();
738 && (theValue.getOpcode() == ISD::AssertZext
739 || theValue.getOpcode() == ISD::AssertSext)) {
740 // Drill down and get the value for zero- and sign-extended
742 theValue = theValue.getOperand(0);
747 SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
748 SDValue insertEltPtr;
751 // If the base pointer is already a D-form address, then just create
752 // a new D-form address with a slot offset and the orignal base pointer.
753 // Otherwise generate a D-form address with the slot offset relative
754 // to the stack pointer, which is always aligned.
755 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
756 DEBUG(basePtr.getNode()->dump(&DAG));
759 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
760 (basePtr.getOpcode() == ISD::ADD
761 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
762 insertEltPtr = basePtr;
764 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
767 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
768 result = DAG.getNode(SPUISD::SHUFB, vecVT,
769 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
771 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
773 result = DAG.getStore(the_chain, result, basePtr,
774 LN->getSrcValue(), LN->getSrcValueOffset(),
775 LN->isVolatile(), LN->getAlignment());
784 case ISD::LAST_INDEXED_MODE:
785 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
787 cerr << (unsigned) SN->getAddressingMode() << "\n";
795 /// Generate the address of a constant pool entry.
797 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
798 MVT PtrVT = Op.getValueType();
799 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
800 Constant *C = CP->getConstVal();
801 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
802 SDValue Zero = DAG.getConstant(0, PtrVT);
803 const TargetMachine &TM = DAG.getTarget();
805 if (TM.getRelocationModel() == Reloc::Static) {
806 if (!ST->usingLargeMem()) {
807 // Just return the SDValue with the constant pool address in it.
808 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
810 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
811 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
812 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
817 "LowerConstantPool: Relocation model other than static"
823 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
824 MVT PtrVT = Op.getValueType();
825 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
826 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
827 SDValue Zero = DAG.getConstant(0, PtrVT);
828 const TargetMachine &TM = DAG.getTarget();
830 if (TM.getRelocationModel() == Reloc::Static) {
831 if (!ST->usingLargeMem()) {
832 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
834 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
835 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
836 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
841 "LowerJumpTable: Relocation model other than static not supported.");
846 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
847 MVT PtrVT = Op.getValueType();
848 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
849 GlobalValue *GV = GSDN->getGlobal();
850 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
851 const TargetMachine &TM = DAG.getTarget();
852 SDValue Zero = DAG.getConstant(0, PtrVT);
854 if (TM.getRelocationModel() == Reloc::Static) {
855 if (!ST->usingLargeMem()) {
856 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
858 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
859 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
860 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
863 cerr << "LowerGlobalAddress: Relocation model other than static not "
872 //! Custom lower i64 integer constants
874 This code inserts all of the necessary juggling that needs to occur to load
875 a 64-bit constant into a register.
878 LowerConstant(SDValue Op, SelectionDAG &DAG) {
879 MVT VT = Op.getValueType();
880 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
882 if (VT == MVT::i64) {
883 SDValue T = DAG.getConstant(CN->getZExtValue(), MVT::i64);
884 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
885 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
887 cerr << "LowerConstant: unhandled constant type "
897 //! Custom lower double precision floating point constants
899 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
900 MVT VT = Op.getValueType();
901 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
904 "LowerConstantFP: Node is not ConstantFPSDNode");
906 if (VT == MVT::f64) {
907 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
908 return DAG.getNode(ISD::BIT_CONVERT, VT,
909 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
915 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
917 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
919 SDValue Cond = Op.getOperand(1);
920 MVT CondVT = Cond.getValueType();
923 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
924 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
925 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
927 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
930 return SDValue(); // Unchanged
934 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
936 MachineFunction &MF = DAG.getMachineFunction();
937 MachineFrameInfo *MFI = MF.getFrameInfo();
938 MachineRegisterInfo &RegInfo = MF.getRegInfo();
939 SmallVector<SDValue, 8> ArgValues;
940 SDValue Root = Op.getOperand(0);
941 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
943 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
944 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
946 unsigned ArgOffset = SPUFrameInfo::minStackSize();
947 unsigned ArgRegIdx = 0;
948 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
950 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
952 // Add DAG nodes to load the arguments or copy them out of registers.
953 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
954 ArgNo != e; ++ArgNo) {
956 bool needsLoad = false;
957 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
958 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
960 switch (ObjectVT.getSimpleVT()) {
962 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
963 << ObjectVT.getMVTString()
968 if (!isVarArg && ArgRegIdx < NumArgRegs) {
969 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
970 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
971 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
978 if (!isVarArg && ArgRegIdx < NumArgRegs) {
979 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
980 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
981 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
988 if (!isVarArg && ArgRegIdx < NumArgRegs) {
989 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
990 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
991 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
998 if (!isVarArg && ArgRegIdx < NumArgRegs) {
999 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
1000 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
1008 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1009 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
1010 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1011 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
1018 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1019 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
1020 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1021 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
1033 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1034 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1035 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1036 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1044 // We need to load the argument to a virtual register if we determined above
1045 // that we ran out of physical registers of the appropriate type
1047 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1048 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1049 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1050 ArgOffset += StackSlotSize;
1053 ArgValues.push_back(ArgVal);
1056 // If the function takes variable number of arguments, make a frame index for
1057 // the start of the first vararg value... for expansion of llvm.va_start.
1059 VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1061 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1062 // If this function is vararg, store any remaining integer argument regs to
1063 // their spots on the stack so that they may be loaded by deferencing the
1064 // result of va_next.
1065 SmallVector<SDValue, 8> MemOps;
1066 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1067 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1068 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1069 SDValue Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1070 SDValue Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1071 MemOps.push_back(Store);
1072 // Increment the address by four for the next argument to store
1073 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1074 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1076 if (!MemOps.empty())
1077 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1080 ArgValues.push_back(Root);
1082 // Return the new list of results.
1083 return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1087 /// isLSAAddress - Return the immediate to use if the specified
1088 /// value is representable as a LSA address.
1089 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1090 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1093 int Addr = C->getZExtValue();
1094 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1095 (Addr << 14 >> 14) != Addr)
1096 return 0; // Top 14 bits have to be sext of immediate.
1098 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1103 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1104 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1105 SDValue Chain = TheCall->getChain();
1107 bool isVarArg = TheCall->isVarArg();
1108 bool isTailCall = TheCall->isTailCall();
1110 SDValue Callee = TheCall->getCallee();
1111 unsigned NumOps = TheCall->getNumArgs();
1112 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1113 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1114 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1116 // Handy pointer type
1117 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1119 // Accumulate how many bytes are to be pushed on the stack, including the
1120 // linkage area, and parameter passing area. According to the SPU ABI,
1121 // we minimally need space for [LR] and [SP]
1122 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1124 // Set up a copy of the stack pointer for use loading and storing any
1125 // arguments that may not fit in the registers available for argument
1127 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1129 // Figure out which arguments are going to go in registers, and which in
1131 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1132 unsigned ArgRegIdx = 0;
1134 // Keep track of registers passing arguments
1135 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1136 // And the arguments passed on the stack
1137 SmallVector<SDValue, 8> MemOpChains;
1139 for (unsigned i = 0; i != NumOps; ++i) {
1140 SDValue Arg = TheCall->getArg(i);
1142 // PtrOff will be used to store the current argument to the stack if a
1143 // register cannot be found for it.
1144 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1145 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1147 switch (Arg.getValueType().getSimpleVT()) {
1148 default: assert(0 && "Unexpected ValueType for argument!");
1152 if (ArgRegIdx != NumArgRegs) {
1153 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1155 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1156 ArgOffset += StackSlotSize;
1161 if (ArgRegIdx != NumArgRegs) {
1162 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1164 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1165 ArgOffset += StackSlotSize;
1172 if (ArgRegIdx != NumArgRegs) {
1173 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1175 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1176 ArgOffset += StackSlotSize;
1182 // Update number of stack bytes actually used, insert a call sequence start
1183 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1184 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1186 if (!MemOpChains.empty()) {
1187 // Adjust the stack pointer for the stack arguments.
1188 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1189 &MemOpChains[0], MemOpChains.size());
1192 // Build a sequence of copy-to-reg nodes chained together with token chain
1193 // and flag operands which copy the outgoing args into the appropriate regs.
1195 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1196 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1198 InFlag = Chain.getValue(1);
1201 SmallVector<SDValue, 8> Ops;
1202 unsigned CallOpc = SPUISD::CALL;
1204 // If the callee is a GlobalAddress/Symbol node (quite common, every direct
1205 // call is) turn it into a TargetGlobalAddress/TargetSymbol node so that
1206 // legalize doesn't hack it.
1207 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1208 GlobalValue *GV = G->getGlobal();
1209 MVT CalleeVT = Callee.getValueType();
1210 SDValue Zero = DAG.getConstant(0, PtrVT);
1211 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1213 if (!ST->usingLargeMem()) {
1214 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1215 // style calls, otherwise, external symbols are BRASL calls. This assumes
1216 // that declared/defined symbols are in the same compilation unit and can
1217 // be reached through PC-relative jumps.
1220 // This may be an unsafe assumption for JIT and really large compilation
1222 if (GV->isDeclaration()) {
1223 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1225 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1228 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1230 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1232 } else if (SymbolSDNode *S = dyn_cast<SymbolSDNode>(Callee))
1233 Callee = DAG.getSymbol(S->getSymbol(), Callee.getValueType(),
1235 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1236 // If this is an absolute destination address that appears to be a legal
1237 // local store address, use the munged value.
1238 Callee = SDValue(Dest, 0);
1241 Ops.push_back(Chain);
1242 Ops.push_back(Callee);
1244 // Add argument registers to the end of the list so that they are known live
1246 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1247 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1248 RegsToPass[i].second.getValueType()));
1250 if (InFlag.getNode())
1251 Ops.push_back(InFlag);
1252 // Returns a chain and a flag for retval copy to use.
1253 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1254 &Ops[0], Ops.size());
1255 InFlag = Chain.getValue(1);
1257 Chain = DAG.getCALLSEQ_END(Chain,
1258 DAG.getConstant(NumStackBytes, PtrVT),
1259 DAG.getConstant(0, PtrVT),
1261 if (TheCall->getValueType(0) != MVT::Other)
1262 InFlag = Chain.getValue(1);
1264 SDValue ResultVals[3];
1265 unsigned NumResults = 0;
1267 // If the call has results, copy the values out of the ret val registers.
1268 switch (TheCall->getValueType(0).getSimpleVT()) {
1269 default: assert(0 && "Unexpected ret value!");
1270 case MVT::Other: break;
1272 if (TheCall->getValueType(1) == MVT::i32) {
1273 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1274 ResultVals[0] = Chain.getValue(0);
1275 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1276 Chain.getValue(2)).getValue(1);
1277 ResultVals[1] = Chain.getValue(0);
1280 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1281 ResultVals[0] = Chain.getValue(0);
1286 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1287 ResultVals[0] = Chain.getValue(0);
1292 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1293 InFlag).getValue(1);
1294 ResultVals[0] = Chain.getValue(0);
1302 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1303 InFlag).getValue(1);
1304 ResultVals[0] = Chain.getValue(0);
1309 // If the function returns void, just return the chain.
1310 if (NumResults == 0)
1313 // Otherwise, merge everything together with a MERGE_VALUES node.
1314 ResultVals[NumResults++] = Chain;
1315 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1316 return Res.getValue(Op.getResNo());
1320 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1321 SmallVector<CCValAssign, 16> RVLocs;
1322 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1323 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1324 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1325 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1327 // If this is the first return lowered for this function, add the regs to the
1328 // liveout set for the function.
1329 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1330 for (unsigned i = 0; i != RVLocs.size(); ++i)
1331 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1334 SDValue Chain = Op.getOperand(0);
1337 // Copy the result values into the output registers.
1338 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1339 CCValAssign &VA = RVLocs[i];
1340 assert(VA.isRegLoc() && "Can only return in registers!");
1341 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1342 Flag = Chain.getValue(1);
1346 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1348 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1352 //===----------------------------------------------------------------------===//
1353 // Vector related lowering:
1354 //===----------------------------------------------------------------------===//
1356 static ConstantSDNode *
1357 getVecImm(SDNode *N) {
1358 SDValue OpVal(0, 0);
1360 // Check to see if this buildvec has a single non-undef value in its elements.
1361 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1362 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1363 if (OpVal.getNode() == 0)
1364 OpVal = N->getOperand(i);
1365 else if (OpVal != N->getOperand(i))
1369 if (OpVal.getNode() != 0) {
1370 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1375 return 0; // All UNDEF: use implicit def.; not Constant node
1378 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1379 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1381 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1383 if (ConstantSDNode *CN = getVecImm(N)) {
1384 uint64_t Value = CN->getZExtValue();
1385 if (ValueType == MVT::i64) {
1386 uint64_t UValue = CN->getZExtValue();
1387 uint32_t upper = uint32_t(UValue >> 32);
1388 uint32_t lower = uint32_t(UValue);
1391 Value = Value >> 32;
1393 if (Value <= 0x3ffff)
1394 return DAG.getConstant(Value, ValueType);
1400 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1401 /// and the value fits into a signed 16-bit constant, and if so, return the
1403 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1405 if (ConstantSDNode *CN = getVecImm(N)) {
1406 int64_t Value = CN->getSignExtended();
1407 if (ValueType == MVT::i64) {
1408 uint64_t UValue = CN->getZExtValue();
1409 uint32_t upper = uint32_t(UValue >> 32);
1410 uint32_t lower = uint32_t(UValue);
1413 Value = Value >> 32;
1415 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1416 return DAG.getConstant(Value, ValueType);
1423 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1424 /// and the value fits into a signed 10-bit constant, and if so, return the
1426 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1428 if (ConstantSDNode *CN = getVecImm(N)) {
1429 int64_t Value = CN->getSignExtended();
1430 if (ValueType == MVT::i64) {
1431 uint64_t UValue = CN->getZExtValue();
1432 uint32_t upper = uint32_t(UValue >> 32);
1433 uint32_t lower = uint32_t(UValue);
1436 Value = Value >> 32;
1438 if (isS10Constant(Value))
1439 return DAG.getConstant(Value, ValueType);
1445 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1446 /// and the value fits into a signed 8-bit constant, and if so, return the
1449 /// @note: The incoming vector is v16i8 because that's the only way we can load
1450 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1452 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1454 if (ConstantSDNode *CN = getVecImm(N)) {
1455 int Value = (int) CN->getZExtValue();
1456 if (ValueType == MVT::i16
1457 && Value <= 0xffff /* truncated from uint64_t */
1458 && ((short) Value >> 8) == ((short) Value & 0xff))
1459 return DAG.getConstant(Value & 0xff, ValueType);
1460 else if (ValueType == MVT::i8
1461 && (Value & 0xff) == Value)
1462 return DAG.getConstant(Value, ValueType);
1468 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1469 /// and the value fits into a signed 16-bit constant, and if so, return the
1471 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1473 if (ConstantSDNode *CN = getVecImm(N)) {
1474 uint64_t Value = CN->getZExtValue();
1475 if ((ValueType == MVT::i32
1476 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1477 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1478 return DAG.getConstant(Value >> 16, ValueType);
1484 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1485 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1486 if (ConstantSDNode *CN = getVecImm(N)) {
1487 return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i32);
1493 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1494 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1495 if (ConstantSDNode *CN = getVecImm(N)) {
1496 return DAG.getConstant((unsigned) CN->getZExtValue(), MVT::i64);
1502 // If this is a vector of constants or undefs, get the bits. A bit in
1503 // UndefBits is set if the corresponding element of the vector is an
1504 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1505 // zero. Return true if this is not an array of constants, false if it is.
1507 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1508 uint64_t UndefBits[2]) {
1509 // Start with zero'd results.
1510 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1512 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1513 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1514 SDValue OpVal = BV->getOperand(i);
1516 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1517 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1519 uint64_t EltBits = 0;
1520 if (OpVal.getOpcode() == ISD::UNDEF) {
1521 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1522 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1524 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1525 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1526 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1527 const APFloat &apf = CN->getValueAPF();
1528 EltBits = (CN->getValueType(0) == MVT::f32
1529 ? FloatToBits(apf.convertToFloat())
1530 : DoubleToBits(apf.convertToDouble()));
1532 // Nonconstant element.
1536 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1539 //printf("%llx %llx %llx %llx\n",
1540 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1544 /// If this is a splat (repetition) of a value across the whole vector, return
1545 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1546 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1547 /// SplatSize = 1 byte.
1548 static bool isConstantSplat(const uint64_t Bits128[2],
1549 const uint64_t Undef128[2],
1551 uint64_t &SplatBits, uint64_t &SplatUndef,
1553 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1554 // the same as the lower 64-bits, ignoring undefs.
1555 uint64_t Bits64 = Bits128[0] | Bits128[1];
1556 uint64_t Undef64 = Undef128[0] & Undef128[1];
1557 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1558 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1559 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1560 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1562 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1563 if (MinSplatBits < 64) {
1565 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1567 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1568 if (MinSplatBits < 32) {
1570 // If the top 16-bits are different than the lower 16-bits, ignoring
1571 // undefs, we have an i32 splat.
1572 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1573 if (MinSplatBits < 16) {
1574 // If the top 8-bits are different than the lower 8-bits, ignoring
1575 // undefs, we have an i16 splat.
1576 if ((Bits16 & (uint16_t(~Undef16) >> 8))
1577 == ((Bits16 >> 8) & ~Undef16)) {
1578 // Otherwise, we have an 8-bit splat.
1579 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1580 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1586 SplatUndef = Undef16;
1593 SplatUndef = Undef32;
1599 SplatBits = Bits128[0];
1600 SplatUndef = Undef128[0];
1606 return false; // Can't be a splat if two pieces don't match.
1609 // If this is a case we can't handle, return null and let the default
1610 // expansion code take care of it. If we CAN select this case, and if it
1611 // selects to a single instruction, return Op. Otherwise, if we can codegen
1612 // this case more efficiently than a constant pool load, lower it to the
1613 // sequence of ops that should be used.
1614 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1615 MVT VT = Op.getValueType();
1616 // If this is a vector of constants or undefs, get the bits. A bit in
1617 // UndefBits is set if the corresponding element of the vector is an
1618 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1620 uint64_t VectorBits[2];
1621 uint64_t UndefBits[2];
1622 uint64_t SplatBits, SplatUndef;
1624 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1625 || !isConstantSplat(VectorBits, UndefBits,
1626 VT.getVectorElementType().getSizeInBits(),
1627 SplatBits, SplatUndef, SplatSize))
1628 return SDValue(); // Not a constant vector, not a splat.
1630 switch (VT.getSimpleVT()) {
1633 uint32_t Value32 = SplatBits;
1634 assert(SplatSize == 4
1635 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1636 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1637 SDValue T = DAG.getConstant(Value32, MVT::i32);
1638 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1639 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1643 uint64_t f64val = SplatBits;
1644 assert(SplatSize == 8
1645 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1646 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1647 SDValue T = DAG.getConstant(f64val, MVT::i64);
1648 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1649 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1653 // 8-bit constants have to be expanded to 16-bits
1654 unsigned short Value16 = SplatBits | (SplatBits << 8);
1656 for (int i = 0; i < 8; ++i)
1657 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1658 return DAG.getNode(ISD::BIT_CONVERT, VT,
1659 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1662 unsigned short Value16;
1664 Value16 = (unsigned short) (SplatBits & 0xffff);
1666 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1667 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1669 for (int i = 0; i < 8; ++i) Ops[i] = T;
1670 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1673 unsigned int Value = SplatBits;
1674 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1675 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1678 uint64_t val = SplatBits;
1679 uint32_t upper = uint32_t(val >> 32);
1680 uint32_t lower = uint32_t(val);
1682 if (upper == lower) {
1683 // Magic constant that can be matched by IL, ILA, et. al.
1684 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1685 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1689 SmallVector<SDValue, 16> ShufBytes;
1691 bool upper_special, lower_special;
1693 // NOTE: This code creates common-case shuffle masks that can be easily
1694 // detected as common expressions. It is not attempting to create highly
1695 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1697 // Detect if the upper or lower half is a special shuffle mask pattern:
1698 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1699 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1701 // Create lower vector if not a special pattern
1702 if (!lower_special) {
1703 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1704 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1705 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1706 LO32C, LO32C, LO32C, LO32C));
1709 // Create upper vector if not a special pattern
1710 if (!upper_special) {
1711 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1712 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1713 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1714 HI32C, HI32C, HI32C, HI32C));
1717 // If either upper or lower are special, then the two input operands are
1718 // the same (basically, one of them is a "don't care")
1723 if (lower_special && upper_special) {
1724 // Unhappy situation... both upper and lower are special, so punt with
1725 // a target constant:
1726 SDValue Zero = DAG.getConstant(0, MVT::i32);
1727 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1731 for (int i = 0; i < 4; ++i) {
1733 for (int j = 0; j < 4; ++j) {
1735 bool process_upper, process_lower;
1737 process_upper = (upper_special && (i & 1) == 0);
1738 process_lower = (lower_special && (i & 1) == 1);
1740 if (process_upper || process_lower) {
1741 if ((process_upper && upper == 0)
1742 || (process_lower && lower == 0))
1744 else if ((process_upper && upper == 0xffffffff)
1745 || (process_lower && lower == 0xffffffff))
1747 else if ((process_upper && upper == 0x80000000)
1748 || (process_lower && lower == 0x80000000))
1749 val |= (j == 0 ? 0xe0 : 0x80);
1751 val |= i * 4 + j + ((i & 1) * 16);
1754 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1757 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1758 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1759 &ShufBytes[0], ShufBytes.size()));
1767 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1768 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1769 /// permutation vector, V3, is monotonically increasing with one "exception"
1770 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1771 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1772 /// In either case, the net result is going to eventually invoke SHUFB to
1773 /// permute/shuffle the bytes from V1 and V2.
1775 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1776 /// control word for byte/halfword/word insertion. This takes care of a single
1777 /// element move from V2 into V1.
1779 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1780 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1781 SDValue V1 = Op.getOperand(0);
1782 SDValue V2 = Op.getOperand(1);
1783 SDValue PermMask = Op.getOperand(2);
1785 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1787 // If we have a single element being moved from V1 to V2, this can be handled
1788 // using the C*[DX] compute mask instructions, but the vector elements have
1789 // to be monotonically increasing with one exception element.
1790 MVT EltVT = V1.getValueType().getVectorElementType();
1791 unsigned EltsFromV2 = 0;
1793 unsigned V2EltIdx0 = 0;
1794 unsigned CurrElt = 0;
1795 bool monotonic = true;
1796 if (EltVT == MVT::i8)
1798 else if (EltVT == MVT::i16)
1800 else if (EltVT == MVT::i32)
1803 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1805 for (unsigned i = 0, e = PermMask.getNumOperands();
1806 EltsFromV2 <= 1 && monotonic && i != e;
1809 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1812 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1814 if (SrcElt >= V2EltIdx0) {
1816 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1817 } else if (CurrElt != SrcElt) {
1824 if (EltsFromV2 == 1 && monotonic) {
1825 // Compute mask and shuffle
1826 MachineFunction &MF = DAG.getMachineFunction();
1827 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1828 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1829 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1830 // Initialize temporary register to 0
1831 SDValue InitTempReg =
1832 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1833 // Copy register's contents as index in INSERT_MASK:
1834 SDValue ShufMaskOp =
1835 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1836 DAG.getTargetConstant(V2Elt, MVT::i32),
1837 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1838 // Use shuffle mask in SHUFB synthetic instruction:
1839 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1841 // Convert the SHUFFLE_VECTOR mask's input element units to the
1843 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1845 SmallVector<SDValue, 16> ResultMask;
1846 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1848 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1851 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1853 for (unsigned j = 0; j < BytesPerElement; ++j) {
1854 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1859 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1860 &ResultMask[0], ResultMask.size());
1861 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1865 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1866 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1868 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1869 // For a constant, build the appropriate constant vector, which will
1870 // eventually simplify to a vector register load.
1872 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1873 SmallVector<SDValue, 16> ConstVecValues;
1877 // Create a constant vector:
1878 switch (Op.getValueType().getSimpleVT()) {
1879 default: assert(0 && "Unexpected constant value type in "
1880 "LowerSCALAR_TO_VECTOR");
1881 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1882 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1883 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1884 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1885 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1886 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1889 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1890 for (size_t j = 0; j < n_copies; ++j)
1891 ConstVecValues.push_back(CValue);
1893 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1894 &ConstVecValues[0], ConstVecValues.size());
1896 // Otherwise, copy the value from one register to another:
1897 switch (Op0.getValueType().getSimpleVT()) {
1898 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1905 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1912 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1913 switch (Op.getValueType().getSimpleVT()) {
1915 cerr << "CellSPU: Unknown vector multiplication, got "
1916 << Op.getValueType().getMVTString()
1922 SDValue rA = Op.getOperand(0);
1923 SDValue rB = Op.getOperand(1);
1924 SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1925 SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1926 SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1927 SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1929 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1933 // Multiply two v8i16 vectors (pipeline friendly version):
1934 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1935 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1936 // c) Use SELB to select upper and lower halves from the intermediate results
1938 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1939 // dual-issue. This code does manage to do this, even if it's a little on
1942 MachineFunction &MF = DAG.getMachineFunction();
1943 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1944 SDValue Chain = Op.getOperand(0);
1945 SDValue rA = Op.getOperand(0);
1946 SDValue rB = Op.getOperand(1);
1947 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1948 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1951 DAG.getCopyToReg(Chain, FSMBIreg,
1952 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1953 DAG.getConstant(0xcccc, MVT::i16)));
1956 DAG.getCopyToReg(FSMBOp, HiProdReg,
1957 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1959 SDValue HHProd_v4i32 =
1960 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1961 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1963 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1964 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1965 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1966 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1968 DAG.getConstant(16, MVT::i16))),
1969 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1972 // This M00sE is N@stI! (apologies to Monty Python)
1974 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1975 // is to break it all apart, sign extend, and reassemble the various
1976 // intermediate products.
1978 SDValue rA = Op.getOperand(0);
1979 SDValue rB = Op.getOperand(1);
1980 SDValue c8 = DAG.getConstant(8, MVT::i32);
1981 SDValue c16 = DAG.getConstant(16, MVT::i32);
1984 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1985 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1986 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1988 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1990 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1993 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1994 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1996 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1997 DAG.getConstant(0x2222, MVT::i16));
1999 SDValue LoProdParts =
2000 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2001 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2002 LLProd, LHProd, FSMBmask));
2004 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
2007 DAG.getNode(ISD::AND, MVT::v4i32,
2009 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2010 LoProdMask, LoProdMask,
2011 LoProdMask, LoProdMask));
2014 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2015 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
2018 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2019 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
2022 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2023 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2024 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2027 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2028 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2029 DAG.getNode(SPUISD::VEC_SRA,
2030 MVT::v4i32, rAH, c8)),
2031 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2032 DAG.getNode(SPUISD::VEC_SRA,
2033 MVT::v4i32, rBH, c8)));
2036 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2038 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2042 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2044 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2045 DAG.getNode(ISD::OR, MVT::v4i32,
2053 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2054 MachineFunction &MF = DAG.getMachineFunction();
2055 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2057 SDValue A = Op.getOperand(0);
2058 SDValue B = Op.getOperand(1);
2059 MVT VT = Op.getValueType();
2061 unsigned VRegBR, VRegC;
2063 if (VT == MVT::f32) {
2064 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2065 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2067 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2068 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2070 // TODO: make sure we're feeding FPInterp the right arguments
2071 // Right now: fi B, frest(B)
2074 // (Floating Interpolate (FP Reciprocal Estimate B))
2076 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2077 DAG.getNode(SPUISD::FPInterp, VT, B,
2078 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2080 // Computes A * BRcpl and stores in a temporary register
2082 DAG.getCopyToReg(BRcpl, VRegC,
2083 DAG.getNode(ISD::FMUL, VT, A,
2084 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2085 // What's the Chain variable do? It's magic!
2086 // TODO: set Chain = Op(0).getEntryNode()
2088 return DAG.getNode(ISD::FADD, VT,
2089 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2090 DAG.getNode(ISD::FMUL, VT,
2091 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2092 DAG.getNode(ISD::FSUB, VT, A,
2093 DAG.getNode(ISD::FMUL, VT, B,
2094 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2097 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2098 MVT VT = Op.getValueType();
2099 SDValue N = Op.getOperand(0);
2100 SDValue Elt = Op.getOperand(1);
2101 SDValue ShufMask[16];
2102 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2104 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2106 int EltNo = (int) C->getZExtValue();
2109 if (VT == MVT::i8 && EltNo >= 16)
2110 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2111 else if (VT == MVT::i16 && EltNo >= 8)
2112 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2113 else if (VT == MVT::i32 && EltNo >= 4)
2114 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2115 else if (VT == MVT::i64 && EltNo >= 2)
2116 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2118 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2119 // i32 and i64: Element 0 is the preferred slot
2120 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2123 // Need to generate shuffle mask and extract:
2124 int prefslot_begin = -1, prefslot_end = -1;
2125 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2127 switch (VT.getSimpleVT()) {
2129 assert(false && "Invalid value type!");
2131 prefslot_begin = prefslot_end = 3;
2135 prefslot_begin = 2; prefslot_end = 3;
2139 prefslot_begin = 0; prefslot_end = 3;
2143 prefslot_begin = 0; prefslot_end = 7;
2148 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2149 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2151 for (int i = 0; i < 16; ++i) {
2152 // zero fill uppper part of preferred slot, don't care about the
2154 unsigned int mask_val;
2156 if (i <= prefslot_end) {
2158 ((i < prefslot_begin)
2160 : elt_byte + (i - prefslot_begin));
2162 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2164 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2167 SDValue ShufMaskVec =
2168 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2170 sizeof(ShufMask) / sizeof(ShufMask[0]));
2172 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2173 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2174 N, N, ShufMaskVec));
2178 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2179 SDValue VecOp = Op.getOperand(0);
2180 SDValue ValOp = Op.getOperand(1);
2181 SDValue IdxOp = Op.getOperand(2);
2182 MVT VT = Op.getValueType();
2184 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2185 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2187 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2188 // Use $2 because it's always 16-byte aligned and it's available:
2189 SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2192 DAG.getNode(SPUISD::SHUFB, VT,
2193 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2195 DAG.getNode(SPUISD::INSERT_MASK, VT,
2196 DAG.getNode(ISD::ADD, PtrVT,
2198 DAG.getConstant(CN->getZExtValue(),
2204 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2206 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2208 assert(Op.getValueType() == MVT::i8);
2211 assert(0 && "Unhandled i8 math operator");
2215 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2217 SDValue N1 = Op.getOperand(1);
2218 N0 = (N0.getOpcode() != ISD::Constant
2219 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2220 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2222 N1 = (N1.getOpcode() != ISD::Constant
2223 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2224 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2226 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2227 DAG.getNode(Opc, MVT::i16, N0, N1));
2231 SDValue N1 = Op.getOperand(1);
2233 N0 = (N0.getOpcode() != ISD::Constant
2234 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2235 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2237 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2240 N1 = (N1.getOpcode() != ISD::Constant
2241 ? DAG.getNode(N1Opc, MVT::i16, N1)
2242 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2245 DAG.getNode(ISD::OR, MVT::i16, N0,
2246 DAG.getNode(ISD::SHL, MVT::i16,
2247 N0, DAG.getConstant(8, MVT::i16)));
2248 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2249 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2253 SDValue N1 = Op.getOperand(1);
2255 N0 = (N0.getOpcode() != ISD::Constant
2256 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2257 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2259 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2262 N1 = (N1.getOpcode() != ISD::Constant
2263 ? DAG.getNode(N1Opc, MVT::i16, N1)
2264 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2266 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2267 DAG.getNode(Opc, MVT::i16, N0, N1));
2270 SDValue N1 = Op.getOperand(1);
2272 N0 = (N0.getOpcode() != ISD::Constant
2273 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2274 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2276 N1Opc = N1.getValueType().bitsLT(MVT::i16)
2279 N1 = (N1.getOpcode() != ISD::Constant
2280 ? DAG.getNode(N1Opc, MVT::i16, N1)
2281 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2283 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2284 DAG.getNode(Opc, MVT::i16, N0, N1));
2287 SDValue N1 = Op.getOperand(1);
2289 N0 = (N0.getOpcode() != ISD::Constant
2290 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2291 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2293 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2294 N1 = (N1.getOpcode() != ISD::Constant
2295 ? DAG.getNode(N1Opc, MVT::i16, N1)
2296 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2298 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2299 DAG.getNode(Opc, MVT::i16, N0, N1));
2307 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2309 MVT VT = Op.getValueType();
2310 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2312 SDValue Op0 = Op.getOperand(0);
2315 case ISD::ZERO_EXTEND:
2316 case ISD::SIGN_EXTEND:
2317 case ISD::ANY_EXTEND: {
2318 MVT Op0VT = Op0.getValueType();
2319 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2321 assert(Op0VT == MVT::i32
2322 && "CellSPU: Zero/sign extending something other than i32");
2323 DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2325 unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2326 ? SPUISD::ROTBYTES_RIGHT_S
2327 : SPUISD::ROTQUAD_RZ_BYTES);
2328 SDValue PromoteScalar =
2329 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2331 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2332 DAG.getNode(ISD::BIT_CONVERT, VecVT,
2333 DAG.getNode(NewOpc, Op0VecVT,
2335 DAG.getConstant(4, MVT::i32))));
2339 // Turn operands into vectors to satisfy type checking (shufb works on
2342 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2344 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2345 SmallVector<SDValue, 16> ShufBytes;
2347 // Create the shuffle mask for "rotating" the borrow up one register slot
2348 // once the borrow is generated.
2349 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2350 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2351 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2352 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2355 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2356 SDValue ShiftedCarry =
2357 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2359 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2360 &ShufBytes[0], ShufBytes.size()));
2362 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2363 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2364 Op0, Op1, ShiftedCarry));
2368 // Turn operands into vectors to satisfy type checking (shufb works on
2371 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2373 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2374 SmallVector<SDValue, 16> ShufBytes;
2376 // Create the shuffle mask for "rotating" the borrow up one register slot
2377 // once the borrow is generated.
2378 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2379 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2380 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2381 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2384 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2385 SDValue ShiftedBorrow =
2386 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2387 BorrowGen, BorrowGen,
2388 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2389 &ShufBytes[0], ShufBytes.size()));
2391 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2392 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2393 Op0, Op1, ShiftedBorrow));
2397 SDValue ShiftAmt = Op.getOperand(1);
2398 MVT ShiftAmtVT = ShiftAmt.getValueType();
2399 SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2401 DAG.getNode(SPUISD::SELB, VecVT,
2403 DAG.getConstant(0, VecVT),
2404 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2405 DAG.getConstant(0xff00ULL, MVT::i16)));
2406 SDValue ShiftAmtBytes =
2407 DAG.getNode(ISD::SRL, ShiftAmtVT,
2409 DAG.getConstant(3, ShiftAmtVT));
2410 SDValue ShiftAmtBits =
2411 DAG.getNode(ISD::AND, ShiftAmtVT,
2413 DAG.getConstant(7, ShiftAmtVT));
2415 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2416 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2417 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2418 MaskLower, ShiftAmtBytes),
2423 MVT VT = Op.getValueType();
2424 SDValue ShiftAmt = Op.getOperand(1);
2425 MVT ShiftAmtVT = ShiftAmt.getValueType();
2426 SDValue ShiftAmtBytes =
2427 DAG.getNode(ISD::SRL, ShiftAmtVT,
2429 DAG.getConstant(3, ShiftAmtVT));
2430 SDValue ShiftAmtBits =
2431 DAG.getNode(ISD::AND, ShiftAmtVT,
2433 DAG.getConstant(7, ShiftAmtVT));
2435 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2436 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2437 Op0, ShiftAmtBytes),
2442 // Promote Op0 to vector
2444 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2445 SDValue ShiftAmt = Op.getOperand(1);
2446 MVT ShiftVT = ShiftAmt.getValueType();
2448 // Negate variable shift amounts
2449 if (!isa<ConstantSDNode>(ShiftAmt)) {
2450 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2451 DAG.getConstant(0, ShiftVT), ShiftAmt);
2454 SDValue UpperHalfSign =
2455 DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2456 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2457 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2458 Op0, DAG.getConstant(31, MVT::i32))));
2459 SDValue UpperHalfSignMask =
2460 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2461 SDValue UpperLowerMask =
2462 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2463 DAG.getConstant(0xff00, MVT::i16));
2464 SDValue UpperLowerSelect =
2465 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2466 UpperHalfSignMask, Op0, UpperLowerMask);
2467 SDValue RotateLeftBytes =
2468 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2469 UpperLowerSelect, ShiftAmt);
2470 SDValue RotateLeftBits =
2471 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2472 RotateLeftBytes, ShiftAmt);
2474 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2482 //! Lower byte immediate operations for v16i8 vectors:
2484 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2487 MVT VT = Op.getValueType();
2489 ConstVec = Op.getOperand(0);
2490 Arg = Op.getOperand(1);
2491 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2492 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2493 ConstVec = ConstVec.getOperand(0);
2495 ConstVec = Op.getOperand(1);
2496 Arg = Op.getOperand(0);
2497 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2498 ConstVec = ConstVec.getOperand(0);
2503 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2504 uint64_t VectorBits[2];
2505 uint64_t UndefBits[2];
2506 uint64_t SplatBits, SplatUndef;
2509 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2510 && isConstantSplat(VectorBits, UndefBits,
2511 VT.getVectorElementType().getSizeInBits(),
2512 SplatBits, SplatUndef, SplatSize)) {
2514 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2515 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2517 // Turn the BUILD_VECTOR into a set of target constants:
2518 for (size_t i = 0; i < tcVecSize; ++i)
2521 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2522 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2525 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2526 // lowered. Return the operation, rather than a null SDValue.
2530 //! Lower i32 multiplication
2531 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2533 switch (VT.getSimpleVT()) {
2535 cerr << "CellSPU: Unknown LowerMUL value type, got "
2536 << Op.getValueType().getMVTString()
2542 SDValue rA = Op.getOperand(0);
2543 SDValue rB = Op.getOperand(1);
2545 return DAG.getNode(ISD::ADD, MVT::i32,
2546 DAG.getNode(ISD::ADD, MVT::i32,
2547 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2548 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2549 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2556 //! Custom lowering for CTPOP (count population)
2558 Custom lowering code that counts the number ones in the input
2559 operand. SPU has such an instruction, but it counts the number of
2560 ones per byte, which then have to be accumulated.
2562 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2563 MVT VT = Op.getValueType();
2564 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2566 switch (VT.getSimpleVT()) {
2568 assert(false && "Invalid value type!");
2570 SDValue N = Op.getOperand(0);
2571 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2573 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2574 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2576 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2580 MachineFunction &MF = DAG.getMachineFunction();
2581 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2583 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2585 SDValue N = Op.getOperand(0);
2586 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2587 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2588 SDValue Shift1 = DAG.getConstant(8, MVT::i16);
2590 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2591 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2593 // CNTB_result becomes the chain to which all of the virtual registers
2594 // CNTB_reg, SUM1_reg become associated:
2595 SDValue CNTB_result =
2596 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2598 SDValue CNTB_rescopy =
2599 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2601 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2603 return DAG.getNode(ISD::AND, MVT::i16,
2604 DAG.getNode(ISD::ADD, MVT::i16,
2605 DAG.getNode(ISD::SRL, MVT::i16,
2612 MachineFunction &MF = DAG.getMachineFunction();
2613 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2615 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2616 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2618 SDValue N = Op.getOperand(0);
2619 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2620 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2621 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2622 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2624 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2625 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2627 // CNTB_result becomes the chain to which all of the virtual registers
2628 // CNTB_reg, SUM1_reg become associated:
2629 SDValue CNTB_result =
2630 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2632 SDValue CNTB_rescopy =
2633 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2636 DAG.getNode(ISD::SRL, MVT::i32,
2637 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2640 DAG.getNode(ISD::ADD, MVT::i32,
2641 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2643 SDValue Sum1_rescopy =
2644 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2647 DAG.getNode(ISD::SRL, MVT::i32,
2648 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2651 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2652 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2654 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2664 /// LowerOperation - Provide custom lowering hooks for some operations.
2667 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2669 unsigned Opc = (unsigned) Op.getOpcode();
2670 MVT VT = Op.getValueType();
2674 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2675 cerr << "Op.getOpcode() = " << Opc << "\n";
2676 cerr << "*Op.getNode():\n";
2677 Op.getNode()->dump();
2683 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2685 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2686 case ISD::ConstantPool:
2687 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2688 case ISD::GlobalAddress:
2689 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2690 case ISD::JumpTable:
2691 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2693 return LowerConstant(Op, DAG);
2694 case ISD::ConstantFP:
2695 return LowerConstantFP(Op, DAG);
2697 return LowerBRCOND(Op, DAG);
2698 case ISD::FORMAL_ARGUMENTS:
2699 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2701 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2703 return LowerRET(Op, DAG, getTargetMachine());
2706 // i8, i64 math ops:
2707 case ISD::ZERO_EXTEND:
2708 case ISD::SIGN_EXTEND:
2709 case ISD::ANY_EXTEND:
2718 return LowerI8Math(Op, DAG, Opc);
2719 else if (VT == MVT::i64)
2720 return LowerI64Math(Op, DAG, Opc);
2724 // Vector-related lowering.
2725 case ISD::BUILD_VECTOR:
2726 return LowerBUILD_VECTOR(Op, DAG);
2727 case ISD::SCALAR_TO_VECTOR:
2728 return LowerSCALAR_TO_VECTOR(Op, DAG);
2729 case ISD::VECTOR_SHUFFLE:
2730 return LowerVECTOR_SHUFFLE(Op, DAG);
2731 case ISD::EXTRACT_VECTOR_ELT:
2732 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2733 case ISD::INSERT_VECTOR_ELT:
2734 return LowerINSERT_VECTOR_ELT(Op, DAG);
2736 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2740 return LowerByteImmed(Op, DAG);
2742 // Vector and i8 multiply:
2745 return LowerVectorMUL(Op, DAG);
2746 else if (VT == MVT::i8)
2747 return LowerI8Math(Op, DAG, Opc);
2749 return LowerMUL(Op, DAG, VT, Opc);
2752 if (VT == MVT::f32 || VT == MVT::v4f32)
2753 return LowerFDIVf32(Op, DAG);
2754 // else if (Op.getValueType() == MVT::f64)
2755 // return LowerFDIVf64(Op, DAG);
2757 assert(0 && "Calling FDIV on unsupported MVT");
2760 return LowerCTPOP(Op, DAG);
2766 //===----------------------------------------------------------------------===//
2767 // Target Optimization Hooks
2768 //===----------------------------------------------------------------------===//
2771 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2774 TargetMachine &TM = getTargetMachine();
2776 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2777 SelectionDAG &DAG = DCI.DAG;
2778 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2779 SDValue Result; // Initially, NULL result
2781 switch (N->getOpcode()) {
2784 SDValue Op1 = N->getOperand(1);
2786 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2787 SDValue Op01 = Op0.getOperand(1);
2788 if (Op01.getOpcode() == ISD::Constant
2789 || Op01.getOpcode() == ISD::TargetConstant) {
2790 // (add <const>, (SPUindirect <arg>, <const>)) ->
2791 // (SPUindirect <arg>, <const + const>)
2792 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2793 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2794 SDValue combinedConst =
2795 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2796 Op0.getValueType());
2798 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2799 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2800 DEBUG(cerr << "With: (SPUindirect <arg>, "
2801 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2802 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2803 Op0.getOperand(0), combinedConst);
2805 } else if (isa<ConstantSDNode>(Op0)
2806 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2807 SDValue Op11 = Op1.getOperand(1);
2808 if (Op11.getOpcode() == ISD::Constant
2809 || Op11.getOpcode() == ISD::TargetConstant) {
2810 // (add (SPUindirect <arg>, <const>), <const>) ->
2811 // (SPUindirect <arg>, <const + const>)
2812 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2813 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2814 SDValue combinedConst =
2815 DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(),
2816 Op0.getValueType());
2818 DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
2819 << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
2820 DEBUG(cerr << "With: (SPUindirect <arg>, "
2821 << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
2823 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2824 Op1.getOperand(0), combinedConst);
2829 case ISD::SIGN_EXTEND:
2830 case ISD::ZERO_EXTEND:
2831 case ISD::ANY_EXTEND: {
2832 if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2833 N->getValueType(0) == Op0.getValueType()) {
2834 // (any_extend (SPUextract_elt0 <arg>)) ->
2835 // (SPUextract_elt0 <arg>)
2836 // Types must match, however...
2837 DEBUG(cerr << "Replace: ");
2838 DEBUG(N->dump(&DAG));
2839 DEBUG(cerr << "\nWith: ");
2840 DEBUG(Op0.getNode()->dump(&DAG));
2841 DEBUG(cerr << "\n");
2847 case SPUISD::IndirectAddr: {
2848 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2849 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2850 if (CN->getZExtValue() == 0) {
2851 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2852 // (SPUaform <addr>, 0)
2854 DEBUG(cerr << "Replace: ");
2855 DEBUG(N->dump(&DAG));
2856 DEBUG(cerr << "\nWith: ");
2857 DEBUG(Op0.getNode()->dump(&DAG));
2858 DEBUG(cerr << "\n");
2865 case SPUISD::SHLQUAD_L_BITS:
2866 case SPUISD::SHLQUAD_L_BYTES:
2867 case SPUISD::VEC_SHL:
2868 case SPUISD::VEC_SRL:
2869 case SPUISD::VEC_SRA:
2870 case SPUISD::ROTQUAD_RZ_BYTES:
2871 case SPUISD::ROTQUAD_RZ_BITS: {
2872 SDValue Op1 = N->getOperand(1);
2874 if (isa<ConstantSDNode>(Op1)) {
2875 // Kill degenerate vector shifts:
2876 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2878 if (CN->getZExtValue() == 0) {
2884 case SPUISD::PROMOTE_SCALAR: {
2885 switch (Op0.getOpcode()) {
2888 case ISD::ANY_EXTEND:
2889 case ISD::ZERO_EXTEND:
2890 case ISD::SIGN_EXTEND: {
2891 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2893 // but only if the SPUpromote_scalar and <arg> types match.
2894 SDValue Op00 = Op0.getOperand(0);
2895 if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2896 SDValue Op000 = Op00.getOperand(0);
2897 if (Op000.getValueType() == N->getValueType(0)) {
2903 case SPUISD::EXTRACT_ELT0: {
2904 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2906 Result = Op0.getOperand(0);
2913 // Otherwise, return unchanged.
2915 if (Result.getNode()) {
2916 DEBUG(cerr << "\nReplace.SPU: ");
2917 DEBUG(N->dump(&DAG));
2918 DEBUG(cerr << "\nWith: ");
2919 DEBUG(Result.getNode()->dump(&DAG));
2920 DEBUG(cerr << "\n");
2927 //===----------------------------------------------------------------------===//
2928 // Inline Assembly Support
2929 //===----------------------------------------------------------------------===//
2931 /// getConstraintType - Given a constraint letter, return the type of
2932 /// constraint it is for this target.
2933 SPUTargetLowering::ConstraintType
2934 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2935 if (ConstraintLetter.size() == 1) {
2936 switch (ConstraintLetter[0]) {
2943 return C_RegisterClass;
2946 return TargetLowering::getConstraintType(ConstraintLetter);
2949 std::pair<unsigned, const TargetRegisterClass*>
2950 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2953 if (Constraint.size() == 1) {
2954 // GCC RS6000 Constraint Letters
2955 switch (Constraint[0]) {
2959 return std::make_pair(0U, SPU::R64CRegisterClass);
2960 return std::make_pair(0U, SPU::R32CRegisterClass);
2963 return std::make_pair(0U, SPU::R32FPRegisterClass);
2964 else if (VT == MVT::f64)
2965 return std::make_pair(0U, SPU::R64FPRegisterClass);
2968 return std::make_pair(0U, SPU::GPRCRegisterClass);
2972 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2975 //! Compute used/known bits for a SPU operand
2977 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2981 const SelectionDAG &DAG,
2982 unsigned Depth ) const {
2984 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2987 switch (Op.getOpcode()) {
2989 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2999 case SPUISD::PROMOTE_SCALAR: {
3000 SDValue Op0 = Op.getOperand(0);
3001 MVT Op0VT = Op0.getValueType();
3002 unsigned Op0VTBits = Op0VT.getSizeInBits();
3003 uint64_t InMask = Op0VT.getIntegerVTBitMask();
3004 KnownZero |= APInt(Op0VTBits, ~InMask, false);
3005 KnownOne |= APInt(Op0VTBits, InMask, false);
3009 case SPUISD::LDRESULT:
3010 case SPUISD::EXTRACT_ELT0:
3011 case SPUISD::EXTRACT_ELT0_CHAINED: {
3012 MVT OpVT = Op.getValueType();
3013 unsigned OpVTBits = OpVT.getSizeInBits();
3014 uint64_t InMask = OpVT.getIntegerVTBitMask();
3015 KnownZero |= APInt(OpVTBits, ~InMask, false);
3016 KnownOne |= APInt(OpVTBits, InMask, false);
3021 case EXTRACT_I1_ZEXT:
3022 case EXTRACT_I1_SEXT:
3023 case EXTRACT_I8_ZEXT:
3024 case EXTRACT_I8_SEXT:
3029 case SPUISD::SHLQUAD_L_BITS:
3030 case SPUISD::SHLQUAD_L_BYTES:
3031 case SPUISD::VEC_SHL:
3032 case SPUISD::VEC_SRL:
3033 case SPUISD::VEC_SRA:
3034 case SPUISD::VEC_ROTL:
3035 case SPUISD::VEC_ROTR:
3036 case SPUISD::ROTQUAD_RZ_BYTES:
3037 case SPUISD::ROTQUAD_RZ_BITS:
3038 case SPUISD::ROTBYTES_RIGHT_S:
3039 case SPUISD::ROTBYTES_LEFT:
3040 case SPUISD::ROTBYTES_LEFT_CHAINED:
3041 case SPUISD::SELECT_MASK:
3043 case SPUISD::FPInterp:
3044 case SPUISD::FPRecipEst:
3045 case SPUISD::SEXT32TO64:
3050 // LowerAsmOperandForConstraint
3052 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3053 char ConstraintLetter,
3054 std::vector<SDValue> &Ops,
3055 SelectionDAG &DAG) const {
3056 // Default, for the time being, to the base class handler
3057 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3060 /// isLegalAddressImmediate - Return true if the integer value can be used
3061 /// as the offset of the target addressing mode.
3062 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3063 const Type *Ty) const {
3064 // SPU's addresses are 256K:
3065 return (V > -(1 << 18) && V < (1 << 18) - 1);
3068 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {