1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT mapping to useful data for Cell SPU
41 struct valtype_map_s {
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDOperand &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDOperand &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // SPU has no sign or zero extended loads for i1, i8, i16:
134 setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote);
135 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
136 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
137 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
138 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
139 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
140 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
141 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
143 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
144 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
145 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
146 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
147 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
148 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
149 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
150 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
152 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
153 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
154 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
156 // SPU constant load actions are custom lowered:
157 setOperationAction(ISD::Constant, MVT::i64, Custom);
158 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
159 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
161 // SPU's loads and stores have to be custom lowered:
162 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
164 MVT VT = (MVT::SimpleValueType)sctype;
166 setOperationAction(ISD::LOAD, VT, Custom);
167 setOperationAction(ISD::STORE, VT, Custom);
170 // Custom lower BRCOND for i1, i8 to "promote" the result to
171 // i32 and i16, respectively.
172 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
174 // Expand the jumptable branches
175 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
176 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
177 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
179 // SPU has no intrinsics for these particular operations:
180 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
182 // PowerPC has no SREM/UREM instructions
183 setOperationAction(ISD::SREM, MVT::i32, Expand);
184 setOperationAction(ISD::UREM, MVT::i32, Expand);
185 setOperationAction(ISD::SREM, MVT::i64, Expand);
186 setOperationAction(ISD::UREM, MVT::i64, Expand);
188 // We don't support sin/cos/sqrt/fmod
189 setOperationAction(ISD::FSIN , MVT::f64, Expand);
190 setOperationAction(ISD::FCOS , MVT::f64, Expand);
191 setOperationAction(ISD::FREM , MVT::f64, Expand);
192 setOperationAction(ISD::FSIN , MVT::f32, Expand);
193 setOperationAction(ISD::FCOS , MVT::f32, Expand);
194 setOperationAction(ISD::FREM , MVT::f32, Expand);
196 // If we're enabling GP optimizations, use hardware square root
197 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
198 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
200 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
201 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
203 // SPU can do rotate right and left, so legalize it... but customize for i8
204 // because instructions don't exist.
205 setOperationAction(ISD::ROTR, MVT::i32, Legal);
206 setOperationAction(ISD::ROTR, MVT::i16, Legal);
207 setOperationAction(ISD::ROTR, MVT::i8, Custom);
208 setOperationAction(ISD::ROTL, MVT::i32, Legal);
209 setOperationAction(ISD::ROTL, MVT::i16, Legal);
210 setOperationAction(ISD::ROTL, MVT::i8, Custom);
211 // SPU has no native version of shift left/right for i8
212 setOperationAction(ISD::SHL, MVT::i8, Custom);
213 setOperationAction(ISD::SRL, MVT::i8, Custom);
214 setOperationAction(ISD::SRA, MVT::i8, Custom);
215 // And SPU needs custom lowering for shift left/right for i64
216 setOperationAction(ISD::SHL, MVT::i64, Custom);
217 setOperationAction(ISD::SRL, MVT::i64, Custom);
218 setOperationAction(ISD::SRA, MVT::i64, Custom);
220 // Custom lower i32 multiplications
221 setOperationAction(ISD::MUL, MVT::i32, Custom);
223 // Need to custom handle (some) common i8, i64 math ops
224 setOperationAction(ISD::ADD, MVT::i64, Custom);
225 setOperationAction(ISD::SUB, MVT::i8, Custom);
226 setOperationAction(ISD::SUB, MVT::i64, Custom);
227 setOperationAction(ISD::MUL, MVT::i8, Custom);
229 // SPU does not have BSWAP. It does have i32 support CTLZ.
230 // CTPOP has to be custom lowered.
231 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
232 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
234 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
235 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
236 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
237 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
239 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
240 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
242 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
244 // SPU has a version of select that implements (a&~c)|(b&c), just like
245 // select ought to work:
246 setOperationAction(ISD::SELECT, MVT::i1, Promote);
247 setOperationAction(ISD::SELECT, MVT::i8, Legal);
248 setOperationAction(ISD::SELECT, MVT::i16, Legal);
249 setOperationAction(ISD::SELECT, MVT::i32, Legal);
250 setOperationAction(ISD::SELECT, MVT::i64, Expand);
252 setOperationAction(ISD::SETCC, MVT::i1, Promote);
253 setOperationAction(ISD::SETCC, MVT::i8, Legal);
254 setOperationAction(ISD::SETCC, MVT::i16, Legal);
255 setOperationAction(ISD::SETCC, MVT::i32, Legal);
256 setOperationAction(ISD::SETCC, MVT::i64, Expand);
258 // Zero extension and sign extension for i64 have to be
260 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
261 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
262 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
264 // SPU has a legal FP -> signed INT instruction
265 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
266 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
267 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
268 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
270 // FDIV on SPU requires custom lowering
271 setOperationAction(ISD::FDIV, MVT::f32, Custom);
272 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
274 // SPU has [U|S]INT_TO_FP
275 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
276 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
277 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
278 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
279 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
280 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
281 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
282 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
284 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
285 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
286 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
287 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
289 // We cannot sextinreg(i1). Expand to shifts.
290 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
292 // Support label based line numbers.
293 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
294 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
296 // We want to legalize GlobalAddress and ConstantPool nodes into the
297 // appropriate instructions to materialize the address.
298 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
300 MVT VT = (MVT::SimpleValueType)sctype;
302 setOperationAction(ISD::GlobalAddress, VT, Custom);
303 setOperationAction(ISD::ConstantPool, VT, Custom);
304 setOperationAction(ISD::JumpTable, VT, Custom);
307 // RET must be custom lowered, to meet ABI requirements
308 setOperationAction(ISD::RET, MVT::Other, Custom);
310 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
311 setOperationAction(ISD::VASTART , MVT::Other, Custom);
313 // Use the default implementation.
314 setOperationAction(ISD::VAARG , MVT::Other, Expand);
315 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
316 setOperationAction(ISD::VAEND , MVT::Other, Expand);
317 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
318 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
319 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
320 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
322 // Cell SPU has instructions for converting between i64 and fp.
323 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
324 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
326 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
327 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
329 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
330 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
332 // First set operation action for all vector types to expand. Then we
333 // will selectively turn on ones that can be effectively codegen'd.
334 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
335 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
336 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
337 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
338 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
339 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
341 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
342 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
343 MVT VT = (MVT::SimpleValueType)i;
345 // add/sub are legal for all supported vector VT's.
346 setOperationAction(ISD::ADD , VT, Legal);
347 setOperationAction(ISD::SUB , VT, Legal);
348 // mul has to be custom lowered.
349 setOperationAction(ISD::MUL , VT, Custom);
351 setOperationAction(ISD::AND , VT, Legal);
352 setOperationAction(ISD::OR , VT, Legal);
353 setOperationAction(ISD::XOR , VT, Legal);
354 setOperationAction(ISD::LOAD , VT, Legal);
355 setOperationAction(ISD::SELECT, VT, Legal);
356 setOperationAction(ISD::STORE, VT, Legal);
358 // These operations need to be expanded:
359 setOperationAction(ISD::SDIV, VT, Expand);
360 setOperationAction(ISD::SREM, VT, Expand);
361 setOperationAction(ISD::UDIV, VT, Expand);
362 setOperationAction(ISD::UREM, VT, Expand);
363 setOperationAction(ISD::FDIV, VT, Custom);
365 // Custom lower build_vector, constant pool spills, insert and
366 // extract vector elements:
367 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
368 setOperationAction(ISD::ConstantPool, VT, Custom);
369 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
370 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
371 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
372 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
375 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
376 setOperationAction(ISD::AND, MVT::v16i8, Custom);
377 setOperationAction(ISD::OR, MVT::v16i8, Custom);
378 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
379 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
381 setShiftAmountType(MVT::i32);
382 setSetCCResultContents(ZeroOrOneSetCCResult);
384 setStackPointerRegisterToSaveRestore(SPU::R1);
386 // We have target-specific dag combine patterns for the following nodes:
387 setTargetDAGCombine(ISD::ADD);
388 setTargetDAGCombine(ISD::ZERO_EXTEND);
389 setTargetDAGCombine(ISD::SIGN_EXTEND);
390 setTargetDAGCombine(ISD::ANY_EXTEND);
392 computeRegisterProperties();
396 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
398 if (node_names.empty()) {
399 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
400 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
401 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
402 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
403 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
404 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
405 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
406 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
407 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
408 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
409 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
410 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
411 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
412 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
413 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
414 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
415 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
416 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
417 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
418 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
419 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
420 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
421 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
422 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
423 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
424 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
425 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
426 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
427 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
428 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
429 "SPUISD::ROTQUAD_RZ_BYTES";
430 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
431 "SPUISD::ROTQUAD_RZ_BITS";
432 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
433 "SPUISD::ROTBYTES_RIGHT_S";
434 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
435 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
436 "SPUISD::ROTBYTES_LEFT_CHAINED";
437 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
438 "SPUISD::ROTBYTES_LEFT_BITS";
439 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
440 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
441 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
442 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
443 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
444 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
445 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
446 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
447 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
450 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
452 return ((i != node_names.end()) ? i->second : 0);
455 MVT SPUTargetLowering::getSetCCResultType(const SDOperand &Op) const {
456 MVT VT = Op.getValueType();
463 //===----------------------------------------------------------------------===//
464 // Calling convention code:
465 //===----------------------------------------------------------------------===//
467 #include "SPUGenCallingConv.inc"
469 //===----------------------------------------------------------------------===//
470 // LowerOperation implementation
471 //===----------------------------------------------------------------------===//
473 /// Aligned load common code for CellSPU
475 \param[in] Op The SelectionDAG load or store operand
476 \param[in] DAG The selection DAG
477 \param[in] ST CellSPU subtarget information structure
478 \param[in,out] alignment Caller initializes this to the load or store node's
479 value from getAlignment(), may be updated while generating the aligned load
480 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
481 offset (divisible by 16, modulo 16 == 0)
482 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
483 offset of the preferred slot (modulo 16 != 0)
484 \param[in,out] VT Caller initializes this value type to the the load or store
485 node's loaded or stored value type; may be updated if an i1-extended load or
487 \param[out] was16aligned true if the base pointer had 16-byte alignment,
488 otherwise false. Can help to determine if the chunk needs to be rotated.
490 Both load and store lowering load a block of data aligned on a 16-byte
491 boundary. This is the common aligned load code shared between both.
494 AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
496 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
497 MVT &VT, bool &was16aligned)
499 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
500 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
501 SDOperand basePtr = LSN->getBasePtr();
502 SDOperand chain = LSN->getChain();
504 if (basePtr.getOpcode() == ISD::ADD) {
505 SDOperand Op1 = basePtr.Val->getOperand(1);
507 if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
508 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
510 alignOffs = (int) CN->getValue();
511 prefSlotOffs = (int) (alignOffs & 0xf);
513 // Adjust the rotation amount to ensure that the final result ends up in
514 // the preferred slot:
515 prefSlotOffs -= vtm->prefslot_byte;
516 basePtr = basePtr.getOperand(0);
518 // Loading from memory, can we adjust alignment?
519 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
520 SDOperand APtr = basePtr.getOperand(0);
521 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
522 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
523 alignment = GSDN->getGlobal()->getAlignment();
528 prefSlotOffs = -vtm->prefslot_byte;
530 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
531 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
532 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
533 prefSlotOffs = (int) (alignOffs & 0xf);
534 prefSlotOffs -= vtm->prefslot_byte;
535 basePtr = DAG.getRegister(SPU::R1, VT);
538 prefSlotOffs = -vtm->prefslot_byte;
541 if (alignment == 16) {
542 // Realign the base pointer as a D-Form address:
543 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
544 basePtr = DAG.getNode(ISD::ADD, PtrVT,
546 DAG.getConstant((alignOffs & ~0xf), PtrVT));
549 // Emit the vector load:
551 return DAG.getLoad(MVT::v16i8, chain, basePtr,
552 LSN->getSrcValue(), LSN->getSrcValueOffset(),
553 LSN->isVolatile(), 16);
556 // Unaligned load or we're using the "large memory" model, which means that
557 // we have to be very pessimistic:
558 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
559 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
563 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
564 DAG.getConstant((alignOffs & ~0xf), PtrVT));
565 was16aligned = false;
566 return DAG.getLoad(MVT::v16i8, chain, basePtr,
567 LSN->getSrcValue(), LSN->getSrcValueOffset(),
568 LSN->isVolatile(), 16);
571 /// Custom lower loads for CellSPU
573 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
574 within a 16-byte block, we have to rotate to extract the requested element.
577 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
578 LoadSDNode *LN = cast<LoadSDNode>(Op);
579 SDOperand the_chain = LN->getChain();
580 MVT VT = LN->getMemoryVT();
581 MVT OpVT = Op.Val->getValueType(0);
582 ISD::LoadExtType ExtType = LN->getExtensionType();
583 unsigned alignment = LN->getAlignment();
586 switch (LN->getAddressingMode()) {
587 case ISD::UNINDEXED: {
591 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
596 the_chain = result.getValue(1);
597 // Rotate the chunk if necessary
600 if (rotamt != 0 || !was16aligned) {
601 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
606 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
608 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
609 LoadSDNode *LN1 = cast<LoadSDNode>(result);
610 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
611 DAG.getConstant(rotamt, PtrVT));
614 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
615 the_chain = result.getValue(1);
618 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
620 MVT vecVT = MVT::v16i8;
622 // Convert the loaded v16i8 vector to the appropriate vector type
623 // specified by the operand:
626 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
628 vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
631 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
632 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
633 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
634 the_chain = result.getValue(1);
636 // Handle the sign and zero-extending loads for i1 and i8:
639 if (ExtType == ISD::SEXTLOAD) {
640 NewOpC = (OpVT == MVT::i1
641 ? SPUISD::EXTRACT_I1_SEXT
642 : SPUISD::EXTRACT_I8_SEXT);
644 assert(ExtType == ISD::ZEXTLOAD);
645 NewOpC = (OpVT == MVT::i1
646 ? SPUISD::EXTRACT_I1_ZEXT
647 : SPUISD::EXTRACT_I8_ZEXT);
650 result = DAG.getNode(NewOpC, OpVT, result);
653 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
654 SDOperand retops[2] = {
659 result = DAG.getNode(SPUISD::LDRESULT, retvts,
660 retops, sizeof(retops) / sizeof(retops[0]));
667 case ISD::LAST_INDEXED_MODE:
668 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
670 cerr << (unsigned) LN->getAddressingMode() << "\n";
678 /// Custom lower stores for CellSPU
680 All CellSPU stores are aligned to 16-byte boundaries, so for elements
681 within a 16-byte block, we have to generate a shuffle to insert the
682 requested element into its place, then store the resulting block.
685 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
686 StoreSDNode *SN = cast<StoreSDNode>(Op);
687 SDOperand Value = SN->getValue();
688 MVT VT = Value.getValueType();
689 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
690 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
691 unsigned alignment = SN->getAlignment();
693 switch (SN->getAddressingMode()) {
694 case ISD::UNINDEXED: {
695 int chunk_offset, slot_offset;
698 // The vector type we really want to load from the 16-byte chunk, except
699 // in the case of MVT::i1, which has to be v16i8.
700 MVT vecVT, stVecVT = MVT::v16i8;
703 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
704 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
706 SDOperand alignLoadVec =
707 AlignedLoad(Op, DAG, ST, SN, alignment,
708 chunk_offset, slot_offset, VT, was16aligned);
710 if (alignLoadVec.Val == 0)
713 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
714 SDOperand basePtr = LN->getBasePtr();
715 SDOperand the_chain = alignLoadVec.getValue(1);
716 SDOperand theValue = SN->getValue();
720 && (theValue.getOpcode() == ISD::AssertZext
721 || theValue.getOpcode() == ISD::AssertSext)) {
722 // Drill down and get the value for zero- and sign-extended
724 theValue = theValue.getOperand(0);
729 SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
730 SDOperand insertEltPtr;
731 SDOperand insertEltOp;
733 // If the base pointer is already a D-form address, then just create
734 // a new D-form address with a slot offset and the orignal base pointer.
735 // Otherwise generate a D-form address with the slot offset relative
736 // to the stack pointer, which is always aligned.
737 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
738 DEBUG(basePtr.Val->dump(&DAG));
741 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
742 (basePtr.getOpcode() == ISD::ADD
743 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
744 insertEltPtr = basePtr;
746 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
749 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
750 result = DAG.getNode(SPUISD::SHUFB, vecVT,
751 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
753 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
755 result = DAG.getStore(the_chain, result, basePtr,
756 LN->getSrcValue(), LN->getSrcValueOffset(),
757 LN->isVolatile(), LN->getAlignment());
766 case ISD::LAST_INDEXED_MODE:
767 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
769 cerr << (unsigned) SN->getAddressingMode() << "\n";
777 /// Generate the address of a constant pool entry.
779 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
780 MVT PtrVT = Op.getValueType();
781 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
782 Constant *C = CP->getConstVal();
783 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
784 SDOperand Zero = DAG.getConstant(0, PtrVT);
785 const TargetMachine &TM = DAG.getTarget();
787 if (TM.getRelocationModel() == Reloc::Static) {
788 if (!ST->usingLargeMem()) {
789 // Just return the SDOperand with the constant pool address in it.
790 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
792 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
793 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
794 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
799 "LowerConstantPool: Relocation model other than static not supported.");
804 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
805 MVT PtrVT = Op.getValueType();
806 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
807 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
808 SDOperand Zero = DAG.getConstant(0, PtrVT);
809 const TargetMachine &TM = DAG.getTarget();
811 if (TM.getRelocationModel() == Reloc::Static) {
812 if (!ST->usingLargeMem()) {
813 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
815 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
816 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
817 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
822 "LowerJumpTable: Relocation model other than static not supported.");
827 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
828 MVT PtrVT = Op.getValueType();
829 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
830 GlobalValue *GV = GSDN->getGlobal();
831 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
832 const TargetMachine &TM = DAG.getTarget();
833 SDOperand Zero = DAG.getConstant(0, PtrVT);
835 if (TM.getRelocationModel() == Reloc::Static) {
836 if (!ST->usingLargeMem()) {
837 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
839 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
840 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
841 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
844 cerr << "LowerGlobalAddress: Relocation model other than static not "
853 //! Custom lower i64 integer constants
855 This code inserts all of the necessary juggling that needs to occur to load
856 a 64-bit constant into a register.
859 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
860 MVT VT = Op.getValueType();
861 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
863 if (VT == MVT::i64) {
864 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
865 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
866 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
868 cerr << "LowerConstant: unhandled constant type "
878 //! Custom lower double precision floating point constants
880 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
881 MVT VT = Op.getValueType();
882 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
885 "LowerConstantFP: Node is not ConstantFPSDNode");
887 if (VT == MVT::f64) {
888 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
889 return DAG.getNode(ISD::BIT_CONVERT, VT,
890 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
896 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
898 LowerBRCOND(SDOperand Op, SelectionDAG &DAG)
900 SDOperand Cond = Op.getOperand(1);
901 MVT CondVT = Cond.getValueType();
904 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
905 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
906 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
908 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
911 return SDOperand(); // Unchanged
915 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
917 MachineFunction &MF = DAG.getMachineFunction();
918 MachineFrameInfo *MFI = MF.getFrameInfo();
919 MachineRegisterInfo &RegInfo = MF.getRegInfo();
920 SmallVector<SDOperand, 8> ArgValues;
921 SDOperand Root = Op.getOperand(0);
922 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
924 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
925 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
927 unsigned ArgOffset = SPUFrameInfo::minStackSize();
928 unsigned ArgRegIdx = 0;
929 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
931 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
933 // Add DAG nodes to load the arguments or copy them out of registers.
934 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
936 bool needsLoad = false;
937 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
938 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
940 switch (ObjectVT.getSimpleVT()) {
942 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
943 << ObjectVT.getMVTString()
948 if (!isVarArg && ArgRegIdx < NumArgRegs) {
949 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
950 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
951 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
958 if (!isVarArg && ArgRegIdx < NumArgRegs) {
959 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
960 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
961 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
968 if (!isVarArg && ArgRegIdx < NumArgRegs) {
969 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
970 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
971 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
978 if (!isVarArg && ArgRegIdx < NumArgRegs) {
979 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
980 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
981 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
988 if (!isVarArg && ArgRegIdx < NumArgRegs) {
989 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
990 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
991 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
998 if (!isVarArg && ArgRegIdx < NumArgRegs) {
999 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
1000 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
1013 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1014 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1015 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1016 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1024 // We need to load the argument to a virtual register if we determined above
1025 // that we ran out of physical registers of the appropriate type
1027 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1028 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1029 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1030 ArgOffset += StackSlotSize;
1033 ArgValues.push_back(ArgVal);
1036 // If the function takes variable number of arguments, make a frame index for
1037 // the start of the first vararg value... for expansion of llvm.va_start.
1039 VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1041 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1042 // If this function is vararg, store any remaining integer argument regs to
1043 // their spots on the stack so that they may be loaded by deferencing the
1044 // result of va_next.
1045 SmallVector<SDOperand, 8> MemOps;
1046 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1047 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1048 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1049 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1050 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1051 MemOps.push_back(Store);
1052 // Increment the address by four for the next argument to store
1053 SDOperand PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1054 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1056 if (!MemOps.empty())
1057 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1060 ArgValues.push_back(Root);
1062 // Return the new list of results.
1063 return DAG.getMergeValues(Op.Val->getVTList(), &ArgValues[0],
1067 /// isLSAAddress - Return the immediate to use if the specified
1068 /// value is representable as a LSA address.
1069 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1070 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1073 int Addr = C->getValue();
1074 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1075 (Addr << 14 >> 14) != Addr)
1076 return 0; // Top 14 bits have to be sext of immediate.
1078 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1083 LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1084 SDOperand Chain = Op.getOperand(0);
1086 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1087 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1089 SDOperand Callee = Op.getOperand(4);
1090 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1091 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1092 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1093 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1095 // Handy pointer type
1096 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1098 // Accumulate how many bytes are to be pushed on the stack, including the
1099 // linkage area, and parameter passing area. According to the SPU ABI,
1100 // we minimally need space for [LR] and [SP]
1101 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1103 // Set up a copy of the stack pointer for use loading and storing any
1104 // arguments that may not fit in the registers available for argument
1106 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1108 // Figure out which arguments are going to go in registers, and which in
1110 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1111 unsigned ArgRegIdx = 0;
1113 // Keep track of registers passing arguments
1114 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1115 // And the arguments passed on the stack
1116 SmallVector<SDOperand, 8> MemOpChains;
1118 for (unsigned i = 0; i != NumOps; ++i) {
1119 SDOperand Arg = Op.getOperand(5+2*i);
1121 // PtrOff will be used to store the current argument to the stack if a
1122 // register cannot be found for it.
1123 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1124 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1126 switch (Arg.getValueType().getSimpleVT()) {
1127 default: assert(0 && "Unexpected ValueType for argument!");
1131 if (ArgRegIdx != NumArgRegs) {
1132 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1134 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1135 ArgOffset += StackSlotSize;
1140 if (ArgRegIdx != NumArgRegs) {
1141 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1143 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1144 ArgOffset += StackSlotSize;
1151 if (ArgRegIdx != NumArgRegs) {
1152 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1154 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1155 ArgOffset += StackSlotSize;
1161 // Update number of stack bytes actually used, insert a call sequence start
1162 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1163 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1165 if (!MemOpChains.empty()) {
1166 // Adjust the stack pointer for the stack arguments.
1167 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1168 &MemOpChains[0], MemOpChains.size());
1171 // Build a sequence of copy-to-reg nodes chained together with token chain
1172 // and flag operands which copy the outgoing args into the appropriate regs.
1174 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1175 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1177 InFlag = Chain.getValue(1);
1180 SmallVector<SDOperand, 8> Ops;
1181 unsigned CallOpc = SPUISD::CALL;
1183 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1184 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1185 // node so that legalize doesn't hack it.
1186 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1187 GlobalValue *GV = G->getGlobal();
1188 MVT CalleeVT = Callee.getValueType();
1189 SDOperand Zero = DAG.getConstant(0, PtrVT);
1190 SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1192 if (!ST->usingLargeMem()) {
1193 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1194 // style calls, otherwise, external symbols are BRASL calls. This assumes
1195 // that declared/defined symbols are in the same compilation unit and can
1196 // be reached through PC-relative jumps.
1199 // This may be an unsafe assumption for JIT and really large compilation
1201 if (GV->isDeclaration()) {
1202 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1204 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1207 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1209 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1211 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1212 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1213 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1214 // If this is an absolute destination address that appears to be a legal
1215 // local store address, use the munged value.
1216 Callee = SDOperand(Dest, 0);
1219 Ops.push_back(Chain);
1220 Ops.push_back(Callee);
1222 // Add argument registers to the end of the list so that they are known live
1224 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1225 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1226 RegsToPass[i].second.getValueType()));
1229 Ops.push_back(InFlag);
1230 // Returns a chain and a flag for retval copy to use.
1231 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1232 &Ops[0], Ops.size());
1233 InFlag = Chain.getValue(1);
1235 Chain = DAG.getCALLSEQ_END(Chain,
1236 DAG.getConstant(NumStackBytes, PtrVT),
1237 DAG.getConstant(0, PtrVT),
1239 if (Op.Val->getValueType(0) != MVT::Other)
1240 InFlag = Chain.getValue(1);
1242 SDOperand ResultVals[3];
1243 unsigned NumResults = 0;
1245 // If the call has results, copy the values out of the ret val registers.
1246 switch (Op.Val->getValueType(0).getSimpleVT()) {
1247 default: assert(0 && "Unexpected ret value!");
1248 case MVT::Other: break;
1250 if (Op.Val->getValueType(1) == MVT::i32) {
1251 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1252 ResultVals[0] = Chain.getValue(0);
1253 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1254 Chain.getValue(2)).getValue(1);
1255 ResultVals[1] = Chain.getValue(0);
1258 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1259 ResultVals[0] = Chain.getValue(0);
1264 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1265 ResultVals[0] = Chain.getValue(0);
1270 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1271 InFlag).getValue(1);
1272 ResultVals[0] = Chain.getValue(0);
1280 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1281 InFlag).getValue(1);
1282 ResultVals[0] = Chain.getValue(0);
1287 // If the function returns void, just return the chain.
1288 if (NumResults == 0)
1291 // Otherwise, merge everything together with a MERGE_VALUES node.
1292 ResultVals[NumResults++] = Chain;
1293 SDOperand Res = DAG.getMergeValues(ResultVals, NumResults);
1294 return Res.getValue(Op.ResNo);
1298 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1299 SmallVector<CCValAssign, 16> RVLocs;
1300 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1301 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1302 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1303 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1305 // If this is the first return lowered for this function, add the regs to the
1306 // liveout set for the function.
1307 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1308 for (unsigned i = 0; i != RVLocs.size(); ++i)
1309 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1312 SDOperand Chain = Op.getOperand(0);
1315 // Copy the result values into the output registers.
1316 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1317 CCValAssign &VA = RVLocs[i];
1318 assert(VA.isRegLoc() && "Can only return in registers!");
1319 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1320 Flag = Chain.getValue(1);
1324 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1326 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1330 //===----------------------------------------------------------------------===//
1331 // Vector related lowering:
1332 //===----------------------------------------------------------------------===//
1334 static ConstantSDNode *
1335 getVecImm(SDNode *N) {
1336 SDOperand OpVal(0, 0);
1338 // Check to see if this buildvec has a single non-undef value in its elements.
1339 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1340 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1342 OpVal = N->getOperand(i);
1343 else if (OpVal != N->getOperand(i))
1347 if (OpVal.Val != 0) {
1348 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1353 return 0; // All UNDEF: use implicit def.; not Constant node
1356 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1357 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1359 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1361 if (ConstantSDNode *CN = getVecImm(N)) {
1362 uint64_t Value = CN->getValue();
1363 if (ValueType == MVT::i64) {
1364 uint64_t UValue = CN->getValue();
1365 uint32_t upper = uint32_t(UValue >> 32);
1366 uint32_t lower = uint32_t(UValue);
1369 Value = Value >> 32;
1371 if (Value <= 0x3ffff)
1372 return DAG.getConstant(Value, ValueType);
1378 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1379 /// and the value fits into a signed 16-bit constant, and if so, return the
1381 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1383 if (ConstantSDNode *CN = getVecImm(N)) {
1384 int64_t Value = CN->getSignExtended();
1385 if (ValueType == MVT::i64) {
1386 uint64_t UValue = CN->getValue();
1387 uint32_t upper = uint32_t(UValue >> 32);
1388 uint32_t lower = uint32_t(UValue);
1391 Value = Value >> 32;
1393 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1394 return DAG.getConstant(Value, ValueType);
1401 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1402 /// and the value fits into a signed 10-bit constant, and if so, return the
1404 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1406 if (ConstantSDNode *CN = getVecImm(N)) {
1407 int64_t Value = CN->getSignExtended();
1408 if (ValueType == MVT::i64) {
1409 uint64_t UValue = CN->getValue();
1410 uint32_t upper = uint32_t(UValue >> 32);
1411 uint32_t lower = uint32_t(UValue);
1414 Value = Value >> 32;
1416 if (isS10Constant(Value))
1417 return DAG.getConstant(Value, ValueType);
1423 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1424 /// and the value fits into a signed 8-bit constant, and if so, return the
1427 /// @note: The incoming vector is v16i8 because that's the only way we can load
1428 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1430 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1432 if (ConstantSDNode *CN = getVecImm(N)) {
1433 int Value = (int) CN->getValue();
1434 if (ValueType == MVT::i16
1435 && Value <= 0xffff /* truncated from uint64_t */
1436 && ((short) Value >> 8) == ((short) Value & 0xff))
1437 return DAG.getConstant(Value & 0xff, ValueType);
1438 else if (ValueType == MVT::i8
1439 && (Value & 0xff) == Value)
1440 return DAG.getConstant(Value, ValueType);
1446 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1447 /// and the value fits into a signed 16-bit constant, and if so, return the
1449 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1451 if (ConstantSDNode *CN = getVecImm(N)) {
1452 uint64_t Value = CN->getValue();
1453 if ((ValueType == MVT::i32
1454 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1455 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1456 return DAG.getConstant(Value >> 16, ValueType);
1462 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1463 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1464 if (ConstantSDNode *CN = getVecImm(N)) {
1465 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1471 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1472 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1473 if (ConstantSDNode *CN = getVecImm(N)) {
1474 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1480 // If this is a vector of constants or undefs, get the bits. A bit in
1481 // UndefBits is set if the corresponding element of the vector is an
1482 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1483 // zero. Return true if this is not an array of constants, false if it is.
1485 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1486 uint64_t UndefBits[2]) {
1487 // Start with zero'd results.
1488 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1490 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1491 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1492 SDOperand OpVal = BV->getOperand(i);
1494 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1495 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1497 uint64_t EltBits = 0;
1498 if (OpVal.getOpcode() == ISD::UNDEF) {
1499 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1500 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1502 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1503 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1504 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1505 const APFloat &apf = CN->getValueAPF();
1506 EltBits = (CN->getValueType(0) == MVT::f32
1507 ? FloatToBits(apf.convertToFloat())
1508 : DoubleToBits(apf.convertToDouble()));
1510 // Nonconstant element.
1514 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1517 //printf("%llx %llx %llx %llx\n",
1518 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1522 /// If this is a splat (repetition) of a value across the whole vector, return
1523 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1524 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1525 /// SplatSize = 1 byte.
1526 static bool isConstantSplat(const uint64_t Bits128[2],
1527 const uint64_t Undef128[2],
1529 uint64_t &SplatBits, uint64_t &SplatUndef,
1531 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1532 // the same as the lower 64-bits, ignoring undefs.
1533 uint64_t Bits64 = Bits128[0] | Bits128[1];
1534 uint64_t Undef64 = Undef128[0] & Undef128[1];
1535 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1536 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1537 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1538 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1540 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1541 if (MinSplatBits < 64) {
1543 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1545 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1546 if (MinSplatBits < 32) {
1548 // If the top 16-bits are different than the lower 16-bits, ignoring
1549 // undefs, we have an i32 splat.
1550 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1551 if (MinSplatBits < 16) {
1552 // If the top 8-bits are different than the lower 8-bits, ignoring
1553 // undefs, we have an i16 splat.
1554 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1555 // Otherwise, we have an 8-bit splat.
1556 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1557 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1563 SplatUndef = Undef16;
1570 SplatUndef = Undef32;
1576 SplatBits = Bits128[0];
1577 SplatUndef = Undef128[0];
1583 return false; // Can't be a splat if two pieces don't match.
1586 // If this is a case we can't handle, return null and let the default
1587 // expansion code take care of it. If we CAN select this case, and if it
1588 // selects to a single instruction, return Op. Otherwise, if we can codegen
1589 // this case more efficiently than a constant pool load, lower it to the
1590 // sequence of ops that should be used.
1591 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1592 MVT VT = Op.getValueType();
1593 // If this is a vector of constants or undefs, get the bits. A bit in
1594 // UndefBits is set if the corresponding element of the vector is an
1595 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1597 uint64_t VectorBits[2];
1598 uint64_t UndefBits[2];
1599 uint64_t SplatBits, SplatUndef;
1601 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1602 || !isConstantSplat(VectorBits, UndefBits,
1603 VT.getVectorElementType().getSizeInBits(),
1604 SplatBits, SplatUndef, SplatSize))
1605 return SDOperand(); // Not a constant vector, not a splat.
1607 switch (VT.getSimpleVT()) {
1610 uint32_t Value32 = SplatBits;
1611 assert(SplatSize == 4
1612 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1613 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1614 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1615 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1616 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1620 uint64_t f64val = SplatBits;
1621 assert(SplatSize == 8
1622 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1623 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1624 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1625 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1626 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1630 // 8-bit constants have to be expanded to 16-bits
1631 unsigned short Value16 = SplatBits | (SplatBits << 8);
1633 for (int i = 0; i < 8; ++i)
1634 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1635 return DAG.getNode(ISD::BIT_CONVERT, VT,
1636 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1639 unsigned short Value16;
1641 Value16 = (unsigned short) (SplatBits & 0xffff);
1643 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1644 SDOperand T = DAG.getConstant(Value16, VT.getVectorElementType());
1646 for (int i = 0; i < 8; ++i) Ops[i] = T;
1647 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1650 unsigned int Value = SplatBits;
1651 SDOperand T = DAG.getConstant(Value, VT.getVectorElementType());
1652 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1655 uint64_t val = SplatBits;
1656 uint32_t upper = uint32_t(val >> 32);
1657 uint32_t lower = uint32_t(val);
1659 if (upper == lower) {
1660 // Magic constant that can be matched by IL, ILA, et. al.
1661 SDOperand Val = DAG.getTargetConstant(val, MVT::i64);
1662 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1666 SmallVector<SDOperand, 16> ShufBytes;
1668 bool upper_special, lower_special;
1670 // NOTE: This code creates common-case shuffle masks that can be easily
1671 // detected as common expressions. It is not attempting to create highly
1672 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1674 // Detect if the upper or lower half is a special shuffle mask pattern:
1675 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1676 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1678 // Create lower vector if not a special pattern
1679 if (!lower_special) {
1680 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1681 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1682 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1683 LO32C, LO32C, LO32C, LO32C));
1686 // Create upper vector if not a special pattern
1687 if (!upper_special) {
1688 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1689 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1690 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1691 HI32C, HI32C, HI32C, HI32C));
1694 // If either upper or lower are special, then the two input operands are
1695 // the same (basically, one of them is a "don't care")
1700 if (lower_special && upper_special) {
1701 // Unhappy situation... both upper and lower are special, so punt with
1702 // a target constant:
1703 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1704 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1708 for (int i = 0; i < 4; ++i) {
1710 for (int j = 0; j < 4; ++j) {
1712 bool process_upper, process_lower;
1714 process_upper = (upper_special && (i & 1) == 0);
1715 process_lower = (lower_special && (i & 1) == 1);
1717 if (process_upper || process_lower) {
1718 if ((process_upper && upper == 0)
1719 || (process_lower && lower == 0))
1721 else if ((process_upper && upper == 0xffffffff)
1722 || (process_lower && lower == 0xffffffff))
1724 else if ((process_upper && upper == 0x80000000)
1725 || (process_lower && lower == 0x80000000))
1726 val |= (j == 0 ? 0xe0 : 0x80);
1728 val |= i * 4 + j + ((i & 1) * 16);
1731 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1734 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1735 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1736 &ShufBytes[0], ShufBytes.size()));
1744 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1745 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1746 /// permutation vector, V3, is monotonically increasing with one "exception"
1747 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1748 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1749 /// In either case, the net result is going to eventually invoke SHUFB to
1750 /// permute/shuffle the bytes from V1 and V2.
1752 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1753 /// control word for byte/halfword/word insertion. This takes care of a single
1754 /// element move from V2 into V1.
1756 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1757 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1758 SDOperand V1 = Op.getOperand(0);
1759 SDOperand V2 = Op.getOperand(1);
1760 SDOperand PermMask = Op.getOperand(2);
1762 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1764 // If we have a single element being moved from V1 to V2, this can be handled
1765 // using the C*[DX] compute mask instructions, but the vector elements have
1766 // to be monotonically increasing with one exception element.
1767 MVT EltVT = V1.getValueType().getVectorElementType();
1768 unsigned EltsFromV2 = 0;
1770 unsigned V2EltIdx0 = 0;
1771 unsigned CurrElt = 0;
1772 bool monotonic = true;
1773 if (EltVT == MVT::i8)
1775 else if (EltVT == MVT::i16)
1777 else if (EltVT == MVT::i32)
1780 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1782 for (unsigned i = 0, e = PermMask.getNumOperands();
1783 EltsFromV2 <= 1 && monotonic && i != e;
1786 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1789 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1791 if (SrcElt >= V2EltIdx0) {
1793 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1794 } else if (CurrElt != SrcElt) {
1801 if (EltsFromV2 == 1 && monotonic) {
1802 // Compute mask and shuffle
1803 MachineFunction &MF = DAG.getMachineFunction();
1804 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1805 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1806 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1807 // Initialize temporary register to 0
1808 SDOperand InitTempReg =
1809 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1810 // Copy register's contents as index in INSERT_MASK:
1811 SDOperand ShufMaskOp =
1812 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1813 DAG.getTargetConstant(V2Elt, MVT::i32),
1814 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1815 // Use shuffle mask in SHUFB synthetic instruction:
1816 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1818 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1819 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1821 SmallVector<SDOperand, 16> ResultMask;
1822 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1824 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1827 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1829 for (unsigned j = 0; j < BytesPerElement; ++j) {
1830 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1835 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1836 &ResultMask[0], ResultMask.size());
1837 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1841 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1842 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1844 if (Op0.Val->getOpcode() == ISD::Constant) {
1845 // For a constant, build the appropriate constant vector, which will
1846 // eventually simplify to a vector register load.
1848 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1849 SmallVector<SDOperand, 16> ConstVecValues;
1853 // Create a constant vector:
1854 switch (Op.getValueType().getSimpleVT()) {
1855 default: assert(0 && "Unexpected constant value type in "
1856 "LowerSCALAR_TO_VECTOR");
1857 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1858 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1859 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1860 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1861 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1862 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1865 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1866 for (size_t j = 0; j < n_copies; ++j)
1867 ConstVecValues.push_back(CValue);
1869 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1870 &ConstVecValues[0], ConstVecValues.size());
1872 // Otherwise, copy the value from one register to another:
1873 switch (Op0.getValueType().getSimpleVT()) {
1874 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1881 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1888 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1889 switch (Op.getValueType().getSimpleVT()) {
1891 cerr << "CellSPU: Unknown vector multiplication, got "
1892 << Op.getValueType().getMVTString()
1898 SDOperand rA = Op.getOperand(0);
1899 SDOperand rB = Op.getOperand(1);
1900 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1901 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1902 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1903 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1905 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1909 // Multiply two v8i16 vectors (pipeline friendly version):
1910 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1911 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1912 // c) Use SELB to select upper and lower halves from the intermediate results
1914 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1915 // dual-issue. This code does manage to do this, even if it's a little on
1918 MachineFunction &MF = DAG.getMachineFunction();
1919 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1920 SDOperand Chain = Op.getOperand(0);
1921 SDOperand rA = Op.getOperand(0);
1922 SDOperand rB = Op.getOperand(1);
1923 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1924 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1927 DAG.getCopyToReg(Chain, FSMBIreg,
1928 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1929 DAG.getConstant(0xcccc, MVT::i16)));
1932 DAG.getCopyToReg(FSMBOp, HiProdReg,
1933 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1935 SDOperand HHProd_v4i32 =
1936 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1937 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1939 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1940 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1941 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1942 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1944 DAG.getConstant(16, MVT::i16))),
1945 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1948 // This M00sE is N@stI! (apologies to Monty Python)
1950 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1951 // is to break it all apart, sign extend, and reassemble the various
1952 // intermediate products.
1954 SDOperand rA = Op.getOperand(0);
1955 SDOperand rB = Op.getOperand(1);
1956 SDOperand c8 = DAG.getConstant(8, MVT::i32);
1957 SDOperand c16 = DAG.getConstant(16, MVT::i32);
1960 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1961 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1962 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1964 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1966 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1969 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1970 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1972 SDOperand FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1973 DAG.getConstant(0x2222, MVT::i16));
1975 SDOperand LoProdParts =
1976 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1977 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1978 LLProd, LHProd, FSMBmask));
1980 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1983 DAG.getNode(ISD::AND, MVT::v4i32,
1985 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1986 LoProdMask, LoProdMask,
1987 LoProdMask, LoProdMask));
1990 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1991 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1994 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1995 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1998 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1999 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2000 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2002 SDOperand HHProd_1 =
2003 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2004 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2005 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
2006 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2007 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
2010 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2012 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2016 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2018 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2019 DAG.getNode(ISD::OR, MVT::v4i32,
2027 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2028 MachineFunction &MF = DAG.getMachineFunction();
2029 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2031 SDOperand A = Op.getOperand(0);
2032 SDOperand B = Op.getOperand(1);
2033 MVT VT = Op.getValueType();
2035 unsigned VRegBR, VRegC;
2037 if (VT == MVT::f32) {
2038 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2039 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2041 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2042 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2044 // TODO: make sure we're feeding FPInterp the right arguments
2045 // Right now: fi B, frest(B)
2048 // (Floating Interpolate (FP Reciprocal Estimate B))
2050 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2051 DAG.getNode(SPUISD::FPInterp, VT, B,
2052 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2054 // Computes A * BRcpl and stores in a temporary register
2056 DAG.getCopyToReg(BRcpl, VRegC,
2057 DAG.getNode(ISD::FMUL, VT, A,
2058 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2059 // What's the Chain variable do? It's magic!
2060 // TODO: set Chain = Op(0).getEntryNode()
2062 return DAG.getNode(ISD::FADD, VT,
2063 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2064 DAG.getNode(ISD::FMUL, VT,
2065 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2066 DAG.getNode(ISD::FSUB, VT, A,
2067 DAG.getNode(ISD::FMUL, VT, B,
2068 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2071 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2072 MVT VT = Op.getValueType();
2073 SDOperand N = Op.getOperand(0);
2074 SDOperand Elt = Op.getOperand(1);
2075 SDOperand ShufMask[16];
2076 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2078 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2080 int EltNo = (int) C->getValue();
2083 if (VT == MVT::i8 && EltNo >= 16)
2084 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2085 else if (VT == MVT::i16 && EltNo >= 8)
2086 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2087 else if (VT == MVT::i32 && EltNo >= 4)
2088 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2089 else if (VT == MVT::i64 && EltNo >= 2)
2090 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2092 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2093 // i32 and i64: Element 0 is the preferred slot
2094 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2097 // Need to generate shuffle mask and extract:
2098 int prefslot_begin = -1, prefslot_end = -1;
2099 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2101 switch (VT.getSimpleVT()) {
2103 assert(false && "Invalid value type!");
2105 prefslot_begin = prefslot_end = 3;
2109 prefslot_begin = 2; prefslot_end = 3;
2113 prefslot_begin = 0; prefslot_end = 3;
2117 prefslot_begin = 0; prefslot_end = 7;
2122 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2123 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2125 for (int i = 0; i < 16; ++i) {
2126 // zero fill uppper part of preferred slot, don't care about the
2128 unsigned int mask_val;
2130 if (i <= prefslot_end) {
2132 ((i < prefslot_begin)
2134 : elt_byte + (i - prefslot_begin));
2136 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2138 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2141 SDOperand ShufMaskVec =
2142 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2144 sizeof(ShufMask) / sizeof(ShufMask[0]));
2146 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2147 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2148 N, N, ShufMaskVec));
2152 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2153 SDOperand VecOp = Op.getOperand(0);
2154 SDOperand ValOp = Op.getOperand(1);
2155 SDOperand IdxOp = Op.getOperand(2);
2156 MVT VT = Op.getValueType();
2158 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2159 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2161 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2162 // Use $2 because it's always 16-byte aligned and it's available:
2163 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2166 DAG.getNode(SPUISD::SHUFB, VT,
2167 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2169 DAG.getNode(SPUISD::INSERT_MASK, VT,
2170 DAG.getNode(ISD::ADD, PtrVT,
2172 DAG.getConstant(CN->getValue(),
2178 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2180 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2182 assert(Op.getValueType() == MVT::i8);
2185 assert(0 && "Unhandled i8 math operator");
2189 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2191 SDOperand N1 = Op.getOperand(1);
2192 N0 = (N0.getOpcode() != ISD::Constant
2193 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2194 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2195 N1 = (N1.getOpcode() != ISD::Constant
2196 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2197 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2198 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2199 DAG.getNode(Opc, MVT::i16, N0, N1));
2203 SDOperand N1 = Op.getOperand(1);
2205 N0 = (N0.getOpcode() != ISD::Constant
2206 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2207 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2208 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::ZERO_EXTEND : ISD::TRUNCATE;
2209 N1 = (N1.getOpcode() != ISD::Constant
2210 ? DAG.getNode(N1Opc, MVT::i16, N1)
2211 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2212 SDOperand ExpandArg =
2213 DAG.getNode(ISD::OR, MVT::i16, N0,
2214 DAG.getNode(ISD::SHL, MVT::i16,
2215 N0, DAG.getConstant(8, MVT::i16)));
2216 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2217 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2221 SDOperand N1 = Op.getOperand(1);
2223 N0 = (N0.getOpcode() != ISD::Constant
2224 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2225 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2226 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::ZERO_EXTEND : ISD::TRUNCATE;
2227 N1 = (N1.getOpcode() != ISD::Constant
2228 ? DAG.getNode(N1Opc, MVT::i16, N1)
2229 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2230 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2231 DAG.getNode(Opc, MVT::i16, N0, N1));
2234 SDOperand N1 = Op.getOperand(1);
2236 N0 = (N0.getOpcode() != ISD::Constant
2237 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2238 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2239 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2240 N1 = (N1.getOpcode() != ISD::Constant
2241 ? DAG.getNode(N1Opc, MVT::i16, N1)
2242 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2243 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2244 DAG.getNode(Opc, MVT::i16, N0, N1));
2247 SDOperand N1 = Op.getOperand(1);
2249 N0 = (N0.getOpcode() != ISD::Constant
2250 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2251 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2252 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2253 N1 = (N1.getOpcode() != ISD::Constant
2254 ? DAG.getNode(N1Opc, MVT::i16, N1)
2255 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2256 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2257 DAG.getNode(Opc, MVT::i16, N0, N1));
2265 static SDOperand LowerI64Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2267 MVT VT = Op.getValueType();
2268 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2270 SDOperand Op0 = Op.getOperand(0);
2273 case ISD::ZERO_EXTEND:
2274 case ISD::SIGN_EXTEND:
2275 case ISD::ANY_EXTEND: {
2276 MVT Op0VT = Op0.getValueType();
2277 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2279 assert(Op0VT == MVT::i32
2280 && "CellSPU: Zero/sign extending something other than i32");
2281 DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2283 unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2284 ? SPUISD::ROTBYTES_RIGHT_S
2285 : SPUISD::ROTQUAD_RZ_BYTES);
2286 SDOperand PromoteScalar =
2287 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2289 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2290 DAG.getNode(ISD::BIT_CONVERT, VecVT,
2291 DAG.getNode(NewOpc, Op0VecVT,
2293 DAG.getConstant(4, MVT::i32))));
2297 // Turn operands into vectors to satisfy type checking (shufb works on
2300 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2302 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2303 SmallVector<SDOperand, 16> ShufBytes;
2305 // Create the shuffle mask for "rotating" the borrow up one register slot
2306 // once the borrow is generated.
2307 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2308 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2309 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2310 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2312 SDOperand CarryGen =
2313 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2314 SDOperand ShiftedCarry =
2315 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2317 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2318 &ShufBytes[0], ShufBytes.size()));
2320 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2321 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2322 Op0, Op1, ShiftedCarry));
2326 // Turn operands into vectors to satisfy type checking (shufb works on
2329 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2331 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2332 SmallVector<SDOperand, 16> ShufBytes;
2334 // Create the shuffle mask for "rotating" the borrow up one register slot
2335 // once the borrow is generated.
2336 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2337 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2338 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2339 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2341 SDOperand BorrowGen =
2342 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2343 SDOperand ShiftedBorrow =
2344 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2345 BorrowGen, BorrowGen,
2346 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2347 &ShufBytes[0], ShufBytes.size()));
2349 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2350 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2351 Op0, Op1, ShiftedBorrow));
2355 SDOperand ShiftAmt = Op.getOperand(1);
2356 MVT ShiftAmtVT = ShiftAmt.getValueType();
2357 SDOperand Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2358 SDOperand MaskLower =
2359 DAG.getNode(SPUISD::SELB, VecVT,
2361 DAG.getConstant(0, VecVT),
2362 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2363 DAG.getConstant(0xff00ULL, MVT::i16)));
2364 SDOperand ShiftAmtBytes =
2365 DAG.getNode(ISD::SRL, ShiftAmtVT,
2367 DAG.getConstant(3, ShiftAmtVT));
2368 SDOperand ShiftAmtBits =
2369 DAG.getNode(ISD::AND, ShiftAmtVT,
2371 DAG.getConstant(7, ShiftAmtVT));
2373 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2374 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2375 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2376 MaskLower, ShiftAmtBytes),
2381 MVT VT = Op.getValueType();
2382 SDOperand ShiftAmt = Op.getOperand(1);
2383 MVT ShiftAmtVT = ShiftAmt.getValueType();
2384 SDOperand ShiftAmtBytes =
2385 DAG.getNode(ISD::SRL, ShiftAmtVT,
2387 DAG.getConstant(3, ShiftAmtVT));
2388 SDOperand ShiftAmtBits =
2389 DAG.getNode(ISD::AND, ShiftAmtVT,
2391 DAG.getConstant(7, ShiftAmtVT));
2393 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2394 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2395 Op0, ShiftAmtBytes),
2400 // Promote Op0 to vector
2402 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2403 SDOperand ShiftAmt = Op.getOperand(1);
2404 MVT ShiftVT = ShiftAmt.getValueType();
2406 // Negate variable shift amounts
2407 if (!isa<ConstantSDNode>(ShiftAmt)) {
2408 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2409 DAG.getConstant(0, ShiftVT), ShiftAmt);
2412 SDOperand UpperHalfSign =
2413 DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2414 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2415 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2416 Op0, DAG.getConstant(31, MVT::i32))));
2417 SDOperand UpperHalfSignMask =
2418 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2419 SDOperand UpperLowerMask =
2420 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2421 DAG.getConstant(0xff00, MVT::i16));
2422 SDOperand UpperLowerSelect =
2423 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2424 UpperHalfSignMask, Op0, UpperLowerMask);
2425 SDOperand RotateLeftBytes =
2426 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2427 UpperLowerSelect, ShiftAmt);
2428 SDOperand RotateLeftBits =
2429 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2430 RotateLeftBytes, ShiftAmt);
2432 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2440 //! Lower byte immediate operations for v16i8 vectors:
2442 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2445 MVT VT = Op.getValueType();
2447 ConstVec = Op.getOperand(0);
2448 Arg = Op.getOperand(1);
2449 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2450 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2451 ConstVec = ConstVec.getOperand(0);
2453 ConstVec = Op.getOperand(1);
2454 Arg = Op.getOperand(0);
2455 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2456 ConstVec = ConstVec.getOperand(0);
2461 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2462 uint64_t VectorBits[2];
2463 uint64_t UndefBits[2];
2464 uint64_t SplatBits, SplatUndef;
2467 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2468 && isConstantSplat(VectorBits, UndefBits,
2469 VT.getVectorElementType().getSizeInBits(),
2470 SplatBits, SplatUndef, SplatSize)) {
2471 SDOperand tcVec[16];
2472 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2473 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2475 // Turn the BUILD_VECTOR into a set of target constants:
2476 for (size_t i = 0; i < tcVecSize; ++i)
2479 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2480 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2487 //! Lower i32 multiplication
2488 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, MVT VT,
2490 switch (VT.getSimpleVT()) {
2492 cerr << "CellSPU: Unknown LowerMUL value type, got "
2493 << Op.getValueType().getMVTString()
2499 SDOperand rA = Op.getOperand(0);
2500 SDOperand rB = Op.getOperand(1);
2502 return DAG.getNode(ISD::ADD, MVT::i32,
2503 DAG.getNode(ISD::ADD, MVT::i32,
2504 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2505 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2506 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2513 //! Custom lowering for CTPOP (count population)
2515 Custom lowering code that counts the number ones in the input
2516 operand. SPU has such an instruction, but it counts the number of
2517 ones per byte, which then have to be accumulated.
2519 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2520 MVT VT = Op.getValueType();
2521 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2523 switch (VT.getSimpleVT()) {
2525 assert(false && "Invalid value type!");
2527 SDOperand N = Op.getOperand(0);
2528 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2530 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2531 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2533 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2537 MachineFunction &MF = DAG.getMachineFunction();
2538 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2540 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2542 SDOperand N = Op.getOperand(0);
2543 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2544 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2545 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2547 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2548 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2550 // CNTB_result becomes the chain to which all of the virtual registers
2551 // CNTB_reg, SUM1_reg become associated:
2552 SDOperand CNTB_result =
2553 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2555 SDOperand CNTB_rescopy =
2556 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2558 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2560 return DAG.getNode(ISD::AND, MVT::i16,
2561 DAG.getNode(ISD::ADD, MVT::i16,
2562 DAG.getNode(ISD::SRL, MVT::i16,
2569 MachineFunction &MF = DAG.getMachineFunction();
2570 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2572 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2573 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2575 SDOperand N = Op.getOperand(0);
2576 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2577 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2578 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2579 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2581 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2582 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2584 // CNTB_result becomes the chain to which all of the virtual registers
2585 // CNTB_reg, SUM1_reg become associated:
2586 SDOperand CNTB_result =
2587 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2589 SDOperand CNTB_rescopy =
2590 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2593 DAG.getNode(ISD::SRL, MVT::i32,
2594 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2597 DAG.getNode(ISD::ADD, MVT::i32,
2598 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2600 SDOperand Sum1_rescopy =
2601 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2604 DAG.getNode(ISD::SRL, MVT::i32,
2605 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2608 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2609 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2611 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2621 /// LowerOperation - Provide custom lowering hooks for some operations.
2624 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2626 unsigned Opc = (unsigned) Op.getOpcode();
2627 MVT VT = Op.getValueType();
2631 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2632 cerr << "Op.getOpcode() = " << Opc << "\n";
2633 cerr << "*Op.Val:\n";
2640 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2642 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2643 case ISD::ConstantPool:
2644 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2645 case ISD::GlobalAddress:
2646 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2647 case ISD::JumpTable:
2648 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2650 return LowerConstant(Op, DAG);
2651 case ISD::ConstantFP:
2652 return LowerConstantFP(Op, DAG);
2654 return LowerBRCOND(Op, DAG);
2655 case ISD::FORMAL_ARGUMENTS:
2656 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2658 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2660 return LowerRET(Op, DAG, getTargetMachine());
2663 // i8, i64 math ops:
2664 case ISD::ZERO_EXTEND:
2665 case ISD::SIGN_EXTEND:
2666 case ISD::ANY_EXTEND:
2675 return LowerI8Math(Op, DAG, Opc);
2676 else if (VT == MVT::i64)
2677 return LowerI64Math(Op, DAG, Opc);
2681 // Vector-related lowering.
2682 case ISD::BUILD_VECTOR:
2683 return LowerBUILD_VECTOR(Op, DAG);
2684 case ISD::SCALAR_TO_VECTOR:
2685 return LowerSCALAR_TO_VECTOR(Op, DAG);
2686 case ISD::VECTOR_SHUFFLE:
2687 return LowerVECTOR_SHUFFLE(Op, DAG);
2688 case ISD::EXTRACT_VECTOR_ELT:
2689 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2690 case ISD::INSERT_VECTOR_ELT:
2691 return LowerINSERT_VECTOR_ELT(Op, DAG);
2693 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2697 return LowerByteImmed(Op, DAG);
2699 // Vector and i8 multiply:
2702 return LowerVectorMUL(Op, DAG);
2703 else if (VT == MVT::i8)
2704 return LowerI8Math(Op, DAG, Opc);
2706 return LowerMUL(Op, DAG, VT, Opc);
2709 if (VT == MVT::f32 || VT == MVT::v4f32)
2710 return LowerFDIVf32(Op, DAG);
2711 // else if (Op.getValueType() == MVT::f64)
2712 // return LowerFDIVf64(Op, DAG);
2714 assert(0 && "Calling FDIV on unsupported MVT");
2717 return LowerCTPOP(Op, DAG);
2723 //===----------------------------------------------------------------------===//
2724 // Target Optimization Hooks
2725 //===----------------------------------------------------------------------===//
2728 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2731 TargetMachine &TM = getTargetMachine();
2733 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2734 SelectionDAG &DAG = DCI.DAG;
2735 SDOperand Op0 = N->getOperand(0); // everything has at least one operand
2736 SDOperand Result; // Initially, NULL result
2738 switch (N->getOpcode()) {
2741 SDOperand Op1 = N->getOperand(1);
2743 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2744 SDOperand Op01 = Op0.getOperand(1);
2745 if (Op01.getOpcode() == ISD::Constant
2746 || Op01.getOpcode() == ISD::TargetConstant) {
2747 // (add <const>, (SPUindirect <arg>, <const>)) ->
2748 // (SPUindirect <arg>, <const + const>)
2749 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2750 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2751 SDOperand combinedConst =
2752 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2753 Op0.getValueType());
2755 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2756 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2757 DEBUG(cerr << "With: (SPUindirect <arg>, "
2758 << CN0->getValue() + CN1->getValue() << ")\n");
2759 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2760 Op0.getOperand(0), combinedConst);
2762 } else if (isa<ConstantSDNode>(Op0)
2763 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2764 SDOperand Op11 = Op1.getOperand(1);
2765 if (Op11.getOpcode() == ISD::Constant
2766 || Op11.getOpcode() == ISD::TargetConstant) {
2767 // (add (SPUindirect <arg>, <const>), <const>) ->
2768 // (SPUindirect <arg>, <const + const>)
2769 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2770 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2771 SDOperand combinedConst =
2772 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2773 Op0.getValueType());
2775 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2776 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2777 DEBUG(cerr << "With: (SPUindirect <arg>, "
2778 << CN0->getValue() + CN1->getValue() << ")\n");
2780 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2781 Op1.getOperand(0), combinedConst);
2786 case ISD::SIGN_EXTEND:
2787 case ISD::ZERO_EXTEND:
2788 case ISD::ANY_EXTEND: {
2789 if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2790 N->getValueType(0) == Op0.getValueType()) {
2791 // (any_extend (SPUextract_elt0 <arg>)) ->
2792 // (SPUextract_elt0 <arg>)
2793 // Types must match, however...
2794 DEBUG(cerr << "Replace: ");
2795 DEBUG(N->dump(&DAG));
2796 DEBUG(cerr << "\nWith: ");
2797 DEBUG(Op0.Val->dump(&DAG));
2798 DEBUG(cerr << "\n");
2804 case SPUISD::IndirectAddr: {
2805 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2806 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2807 if (CN->getValue() == 0) {
2808 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2809 // (SPUaform <addr>, 0)
2811 DEBUG(cerr << "Replace: ");
2812 DEBUG(N->dump(&DAG));
2813 DEBUG(cerr << "\nWith: ");
2814 DEBUG(Op0.Val->dump(&DAG));
2815 DEBUG(cerr << "\n");
2822 case SPUISD::SHLQUAD_L_BITS:
2823 case SPUISD::SHLQUAD_L_BYTES:
2824 case SPUISD::VEC_SHL:
2825 case SPUISD::VEC_SRL:
2826 case SPUISD::VEC_SRA:
2827 case SPUISD::ROTQUAD_RZ_BYTES:
2828 case SPUISD::ROTQUAD_RZ_BITS: {
2829 SDOperand Op1 = N->getOperand(1);
2831 if (isa<ConstantSDNode>(Op1)) {
2832 // Kill degenerate vector shifts:
2833 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2835 if (CN->getValue() == 0) {
2841 case SPUISD::PROMOTE_SCALAR: {
2842 switch (Op0.getOpcode()) {
2845 case ISD::ANY_EXTEND:
2846 case ISD::ZERO_EXTEND:
2847 case ISD::SIGN_EXTEND: {
2848 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2850 // but only if the SPUpromote_scalar and <arg> types match.
2851 SDOperand Op00 = Op0.getOperand(0);
2852 if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2853 SDOperand Op000 = Op00.getOperand(0);
2854 if (Op000.getValueType() == N->getValueType(0)) {
2860 case SPUISD::EXTRACT_ELT0: {
2861 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2863 Result = Op0.getOperand(0);
2870 // Otherwise, return unchanged.
2873 DEBUG(cerr << "\nReplace.SPU: ");
2874 DEBUG(N->dump(&DAG));
2875 DEBUG(cerr << "\nWith: ");
2876 DEBUG(Result.Val->dump(&DAG));
2877 DEBUG(cerr << "\n");
2884 //===----------------------------------------------------------------------===//
2885 // Inline Assembly Support
2886 //===----------------------------------------------------------------------===//
2888 /// getConstraintType - Given a constraint letter, return the type of
2889 /// constraint it is for this target.
2890 SPUTargetLowering::ConstraintType
2891 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2892 if (ConstraintLetter.size() == 1) {
2893 switch (ConstraintLetter[0]) {
2900 return C_RegisterClass;
2903 return TargetLowering::getConstraintType(ConstraintLetter);
2906 std::pair<unsigned, const TargetRegisterClass*>
2907 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2910 if (Constraint.size() == 1) {
2911 // GCC RS6000 Constraint Letters
2912 switch (Constraint[0]) {
2916 return std::make_pair(0U, SPU::R64CRegisterClass);
2917 return std::make_pair(0U, SPU::R32CRegisterClass);
2920 return std::make_pair(0U, SPU::R32FPRegisterClass);
2921 else if (VT == MVT::f64)
2922 return std::make_pair(0U, SPU::R64FPRegisterClass);
2925 return std::make_pair(0U, SPU::GPRCRegisterClass);
2929 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2932 //! Compute used/known bits for a SPU operand
2934 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2938 const SelectionDAG &DAG,
2939 unsigned Depth ) const {
2941 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2944 switch (Op.getOpcode()) {
2946 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2956 case SPUISD::PROMOTE_SCALAR: {
2957 SDOperand Op0 = Op.getOperand(0);
2958 MVT Op0VT = Op0.getValueType();
2959 unsigned Op0VTBits = Op0VT.getSizeInBits();
2960 uint64_t InMask = Op0VT.getIntegerVTBitMask();
2961 KnownZero |= APInt(Op0VTBits, ~InMask, false);
2962 KnownOne |= APInt(Op0VTBits, InMask, false);
2966 case SPUISD::LDRESULT:
2967 case SPUISD::EXTRACT_ELT0:
2968 case SPUISD::EXTRACT_ELT0_CHAINED: {
2969 MVT OpVT = Op.getValueType();
2970 unsigned OpVTBits = OpVT.getSizeInBits();
2971 uint64_t InMask = OpVT.getIntegerVTBitMask();
2972 KnownZero |= APInt(OpVTBits, ~InMask, false);
2973 KnownOne |= APInt(OpVTBits, InMask, false);
2978 case EXTRACT_I1_ZEXT:
2979 case EXTRACT_I1_SEXT:
2980 case EXTRACT_I8_ZEXT:
2981 case EXTRACT_I8_SEXT:
2986 case SPUISD::SHLQUAD_L_BITS:
2987 case SPUISD::SHLQUAD_L_BYTES:
2988 case SPUISD::VEC_SHL:
2989 case SPUISD::VEC_SRL:
2990 case SPUISD::VEC_SRA:
2991 case SPUISD::VEC_ROTL:
2992 case SPUISD::VEC_ROTR:
2993 case SPUISD::ROTQUAD_RZ_BYTES:
2994 case SPUISD::ROTQUAD_RZ_BITS:
2995 case SPUISD::ROTBYTES_RIGHT_S:
2996 case SPUISD::ROTBYTES_LEFT:
2997 case SPUISD::ROTBYTES_LEFT_CHAINED:
2998 case SPUISD::SELECT_MASK:
3000 case SPUISD::FPInterp:
3001 case SPUISD::FPRecipEst:
3002 case SPUISD::SEXT32TO64:
3007 // LowerAsmOperandForConstraint
3009 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
3010 char ConstraintLetter,
3011 std::vector<SDOperand> &Ops,
3012 SelectionDAG &DAG) const {
3013 // Default, for the time being, to the base class handler
3014 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3017 /// isLegalAddressImmediate - Return true if the integer value can be used
3018 /// as the offset of the target addressing mode.
3019 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
3020 // SPU's addresses are 256K:
3021 return (V > -(1 << 18) && V < (1 << 18) - 1);
3024 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {