1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "llvm/ADT/VectorExtras.h"
18 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT::ValueType mapping to useful data for Cell SPU
41 struct valtype_map_s {
42 const MVT::ValueType valtype;
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
72 << MVT::getValueTypeString(VT)
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDOperand &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDOperand &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // SPU has no sign or zero extended loads for i1, i8, i16:
134 setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote);
135 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
136 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
137 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
138 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
139 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
140 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
141 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
143 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
144 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
145 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
146 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
147 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
148 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
149 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
150 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
152 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
153 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
154 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
156 // SPU constant load actions are custom lowered:
157 setOperationAction(ISD::Constant, MVT::i64, Custom);
158 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
159 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
161 // SPU's loads and stores have to be custom lowered:
162 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
164 setOperationAction(ISD::LOAD, sctype, Custom);
165 setOperationAction(ISD::STORE, sctype, Custom);
168 // Custom lower BRCOND for i1, i8 to "promote" the result to
169 // i32 and i16, respectively.
170 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
172 // Expand the jumptable branches
173 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
174 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
175 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
177 // SPU has no intrinsics for these particular operations:
178 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
179 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
180 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
181 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
183 // PowerPC has no SREM/UREM instructions
184 setOperationAction(ISD::SREM, MVT::i32, Expand);
185 setOperationAction(ISD::UREM, MVT::i32, Expand);
186 setOperationAction(ISD::SREM, MVT::i64, Expand);
187 setOperationAction(ISD::UREM, MVT::i64, Expand);
189 // We don't support sin/cos/sqrt/fmod
190 setOperationAction(ISD::FSIN , MVT::f64, Expand);
191 setOperationAction(ISD::FCOS , MVT::f64, Expand);
192 setOperationAction(ISD::FREM , MVT::f64, Expand);
193 setOperationAction(ISD::FSIN , MVT::f32, Expand);
194 setOperationAction(ISD::FCOS , MVT::f32, Expand);
195 setOperationAction(ISD::FREM , MVT::f32, Expand);
197 // If we're enabling GP optimizations, use hardware square root
198 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
199 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
201 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
202 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
204 // SPU can do rotate right and left, so legalize it... but customize for i8
205 // because instructions don't exist.
206 setOperationAction(ISD::ROTR, MVT::i32, Legal);
207 setOperationAction(ISD::ROTR, MVT::i16, Legal);
208 setOperationAction(ISD::ROTR, MVT::i8, Custom);
209 setOperationAction(ISD::ROTL, MVT::i32, Legal);
210 setOperationAction(ISD::ROTL, MVT::i16, Legal);
211 setOperationAction(ISD::ROTL, MVT::i8, Custom);
212 // SPU has no native version of shift left/right for i8
213 setOperationAction(ISD::SHL, MVT::i8, Custom);
214 setOperationAction(ISD::SRL, MVT::i8, Custom);
215 setOperationAction(ISD::SRA, MVT::i8, Custom);
216 // And SPU needs custom lowering for shift left/right for i64
217 setOperationAction(ISD::SHL, MVT::i64, Custom);
218 setOperationAction(ISD::SRL, MVT::i64, Custom);
219 setOperationAction(ISD::SRA, MVT::i64, Custom);
221 // Custom lower i32 multiplications
222 setOperationAction(ISD::MUL, MVT::i32, Custom);
224 // Need to custom handle (some) common i8 math ops
225 setOperationAction(ISD::SUB, MVT::i8, Custom);
226 setOperationAction(ISD::MUL, MVT::i8, Custom);
228 // SPU does not have BSWAP. It does have i32 support CTLZ.
229 // CTPOP has to be custom lowered.
230 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
231 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
233 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
234 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
235 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
236 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
238 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
239 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
241 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
243 // SPU has a version of select that implements (a&~c)|(b|c), just like
244 // select ought to work:
245 setOperationAction(ISD::SELECT, MVT::i1, Promote);
246 setOperationAction(ISD::SELECT, MVT::i8, Legal);
247 setOperationAction(ISD::SELECT, MVT::i16, Legal);
248 setOperationAction(ISD::SELECT, MVT::i32, Legal);
249 setOperationAction(ISD::SELECT, MVT::i64, Expand);
251 setOperationAction(ISD::SETCC, MVT::i1, Promote);
252 setOperationAction(ISD::SETCC, MVT::i8, Legal);
253 setOperationAction(ISD::SETCC, MVT::i16, Legal);
254 setOperationAction(ISD::SETCC, MVT::i32, Legal);
255 setOperationAction(ISD::SETCC, MVT::i64, Expand);
257 // Zero extension and sign extension for i64 have to be
259 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
260 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
261 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
263 // SPU has a legal FP -> signed INT instruction
264 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
265 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
266 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
267 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
269 // FDIV on SPU requires custom lowering
270 setOperationAction(ISD::FDIV, MVT::f32, Custom);
271 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
273 // SPU has [U|S]INT_TO_FP
274 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
275 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
276 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
277 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
278 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
279 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
280 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
281 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
283 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
284 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
285 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
286 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
288 // We cannot sextinreg(i1). Expand to shifts.
289 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
291 // Support label based line numbers.
292 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
293 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
295 // We want to legalize GlobalAddress and ConstantPool nodes into the
296 // appropriate instructions to materialize the address.
297 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
299 setOperationAction(ISD::GlobalAddress, sctype, Custom);
300 setOperationAction(ISD::ConstantPool, sctype, Custom);
301 setOperationAction(ISD::JumpTable, sctype, Custom);
304 // RET must be custom lowered, to meet ABI requirements
305 setOperationAction(ISD::RET, MVT::Other, Custom);
307 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
308 setOperationAction(ISD::VASTART , MVT::Other, Custom);
310 // Use the default implementation.
311 setOperationAction(ISD::VAARG , MVT::Other, Expand);
312 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
313 setOperationAction(ISD::VAEND , MVT::Other, Expand);
314 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
315 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
316 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
317 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
319 // Cell SPU has instructions for converting between i64 and fp.
320 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
321 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
323 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
324 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
326 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
327 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
329 // First set operation action for all vector types to expand. Then we
330 // will selectively turn on ones that can be effectively codegen'd.
331 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
332 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
333 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
334 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
335 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
336 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
338 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
339 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
340 // add/sub are legal for all supported vector VT's.
341 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
342 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
343 // mul has to be custom lowered.
344 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
346 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
347 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
348 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
349 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
350 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
351 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
353 // These operations need to be expanded:
354 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
355 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
356 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
357 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
358 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
360 // Custom lower build_vector, constant pool spills, insert and
361 // extract vector elements:
362 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
363 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
364 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
365 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
366 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
367 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
370 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
371 setOperationAction(ISD::AND, MVT::v16i8, Custom);
372 setOperationAction(ISD::OR, MVT::v16i8, Custom);
373 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
374 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
376 setShiftAmountType(MVT::i32);
377 setSetCCResultContents(ZeroOrOneSetCCResult);
379 setStackPointerRegisterToSaveRestore(SPU::R1);
381 // We have target-specific dag combine patterns for the following nodes:
382 setTargetDAGCombine(ISD::ADD);
383 setTargetDAGCombine(ISD::ZERO_EXTEND);
384 setTargetDAGCombine(ISD::SIGN_EXTEND);
385 setTargetDAGCombine(ISD::ANY_EXTEND);
387 computeRegisterProperties();
391 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
393 if (node_names.empty()) {
394 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
395 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
396 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
397 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
398 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
399 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
400 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
401 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
402 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
403 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
404 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
405 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
406 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
407 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
408 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
409 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
410 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
411 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
412 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
413 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
414 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
415 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
416 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
417 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
418 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
419 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
420 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
421 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
422 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
423 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
424 "SPUISD::ROTQUAD_RZ_BYTES";
425 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
426 "SPUISD::ROTQUAD_RZ_BITS";
427 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
428 "SPUISD::ROTBYTES_RIGHT_S";
429 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
430 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
431 "SPUISD::ROTBYTES_LEFT_CHAINED";
432 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
433 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
434 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
435 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
436 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
439 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
441 return ((i != node_names.end()) ? i->second : 0);
445 SPUTargetLowering::getSetCCResultType(const SDOperand &Op) const {
446 MVT::ValueType VT = Op.getValueType();
447 if (MVT::isInteger(VT))
453 //===----------------------------------------------------------------------===//
454 // Calling convention code:
455 //===----------------------------------------------------------------------===//
457 #include "SPUGenCallingConv.inc"
459 //===----------------------------------------------------------------------===//
460 // LowerOperation implementation
461 //===----------------------------------------------------------------------===//
463 /// Aligned load common code for CellSPU
465 \param[in] Op The SelectionDAG load or store operand
466 \param[in] DAG The selection DAG
467 \param[in] ST CellSPU subtarget information structure
468 \param[in,out] alignment Caller initializes this to the load or store node's
469 value from getAlignment(), may be updated while generating the aligned load
470 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
471 offset (divisible by 16, modulo 16 == 0)
472 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
473 offset of the preferred slot (modulo 16 != 0)
474 \param[in,out] VT Caller initializes this value type to the the load or store
475 node's loaded or stored value type; may be updated if an i1-extended load or
477 \param[out] was16aligned true if the base pointer had 16-byte alignment,
478 otherwise false. Can help to determine if the chunk needs to be rotated.
480 Both load and store lowering load a block of data aligned on a 16-byte
481 boundary. This is the common aligned load code shared between both.
484 AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
486 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
487 MVT::ValueType &VT, bool &was16aligned)
489 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
490 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
491 SDOperand basePtr = LSN->getBasePtr();
492 SDOperand chain = LSN->getChain();
494 if (basePtr.getOpcode() == ISD::ADD) {
495 SDOperand Op1 = basePtr.Val->getOperand(1);
497 if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
498 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
500 alignOffs = (int) CN->getValue();
501 prefSlotOffs = (int) (alignOffs & 0xf);
503 // Adjust the rotation amount to ensure that the final result ends up in
504 // the preferred slot:
505 prefSlotOffs -= vtm->prefslot_byte;
506 basePtr = basePtr.getOperand(0);
508 // Loading from memory, can we adjust alignment?
509 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
510 SDOperand APtr = basePtr.getOperand(0);
511 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
512 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
513 alignment = GSDN->getGlobal()->getAlignment();
518 prefSlotOffs = -vtm->prefslot_byte;
522 prefSlotOffs = -vtm->prefslot_byte;
525 if (alignment == 16) {
526 // Realign the base pointer as a D-Form address:
527 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
528 basePtr = DAG.getNode(ISD::ADD, PtrVT,
530 DAG.getConstant((alignOffs & ~0xf), PtrVT));
533 // Emit the vector load:
535 return DAG.getLoad(MVT::v16i8, chain, basePtr,
536 LSN->getSrcValue(), LSN->getSrcValueOffset(),
537 LSN->isVolatile(), 16);
540 // Unaligned load or we're using the "large memory" model, which means that
541 // we have to be very pessimistic:
542 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
543 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
547 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
548 DAG.getConstant((alignOffs & ~0xf), PtrVT));
549 was16aligned = false;
550 return DAG.getLoad(MVT::v16i8, chain, basePtr,
551 LSN->getSrcValue(), LSN->getSrcValueOffset(),
552 LSN->isVolatile(), 16);
555 /// Custom lower loads for CellSPU
557 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
558 within a 16-byte block, we have to rotate to extract the requested element.
561 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
562 LoadSDNode *LN = cast<LoadSDNode>(Op);
563 SDOperand the_chain = LN->getChain();
564 MVT::ValueType VT = LN->getMemoryVT();
565 MVT::ValueType OpVT = Op.Val->getValueType(0);
566 ISD::LoadExtType ExtType = LN->getExtensionType();
567 unsigned alignment = LN->getAlignment();
570 switch (LN->getAddressingMode()) {
571 case ISD::UNINDEXED: {
575 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
580 the_chain = result.getValue(1);
581 // Rotate the chunk if necessary
584 if (rotamt != 0 || !was16aligned) {
585 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
590 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
592 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
593 LoadSDNode *LN1 = cast<LoadSDNode>(result);
594 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
595 DAG.getConstant(rotamt, PtrVT));
598 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
599 the_chain = result.getValue(1);
602 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
604 MVT::ValueType vecVT = MVT::v16i8;
606 // Convert the loaded v16i8 vector to the appropriate vector type
607 // specified by the operand:
610 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
612 vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
615 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
616 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
617 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
618 the_chain = result.getValue(1);
620 // Handle the sign and zero-extending loads for i1 and i8:
623 if (ExtType == ISD::SEXTLOAD) {
624 NewOpC = (OpVT == MVT::i1
625 ? SPUISD::EXTRACT_I1_SEXT
626 : SPUISD::EXTRACT_I8_SEXT);
628 assert(ExtType == ISD::ZEXTLOAD);
629 NewOpC = (OpVT == MVT::i1
630 ? SPUISD::EXTRACT_I1_ZEXT
631 : SPUISD::EXTRACT_I8_ZEXT);
634 result = DAG.getNode(NewOpC, OpVT, result);
637 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
638 SDOperand retops[2] = {
643 result = DAG.getNode(SPUISD::LDRESULT, retvts,
644 retops, sizeof(retops) / sizeof(retops[0]));
651 case ISD::LAST_INDEXED_MODE:
652 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
654 cerr << (unsigned) LN->getAddressingMode() << "\n";
662 /// Custom lower stores for CellSPU
664 All CellSPU stores are aligned to 16-byte boundaries, so for elements
665 within a 16-byte block, we have to generate a shuffle to insert the
666 requested element into its place, then store the resulting block.
669 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
670 StoreSDNode *SN = cast<StoreSDNode>(Op);
671 SDOperand Value = SN->getValue();
672 MVT::ValueType VT = Value.getValueType();
673 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
674 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
675 unsigned alignment = SN->getAlignment();
677 switch (SN->getAddressingMode()) {
678 case ISD::UNINDEXED: {
679 int chunk_offset, slot_offset;
682 // The vector type we really want to load from the 16-byte chunk, except
683 // in the case of MVT::i1, which has to be v16i8.
684 unsigned vecVT, stVecVT = MVT::v16i8;
687 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
688 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
690 SDOperand alignLoadVec =
691 AlignedLoad(Op, DAG, ST, SN, alignment,
692 chunk_offset, slot_offset, VT, was16aligned);
694 if (alignLoadVec.Val == 0)
697 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
698 SDOperand basePtr = LN->getBasePtr();
699 SDOperand the_chain = alignLoadVec.getValue(1);
700 SDOperand theValue = SN->getValue();
704 && (theValue.getOpcode() == ISD::AssertZext
705 || theValue.getOpcode() == ISD::AssertSext)) {
706 // Drill down and get the value for zero- and sign-extended
708 theValue = theValue.getOperand(0);
713 SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
714 SDOperand insertEltPtr;
715 SDOperand insertEltOp;
717 // If the base pointer is already a D-form address, then just create
718 // a new D-form address with a slot offset and the orignal base pointer.
719 // Otherwise generate a D-form address with the slot offset relative
720 // to the stack pointer, which is always aligned.
721 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
722 DEBUG(basePtr.Val->dump(&DAG));
725 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
726 (basePtr.getOpcode() == ISD::ADD
727 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
728 insertEltPtr = basePtr;
730 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
733 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
734 result = DAG.getNode(SPUISD::SHUFB, vecVT,
735 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
737 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
739 result = DAG.getStore(the_chain, result, basePtr,
740 LN->getSrcValue(), LN->getSrcValueOffset(),
741 LN->isVolatile(), LN->getAlignment());
750 case ISD::LAST_INDEXED_MODE:
751 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
753 cerr << (unsigned) SN->getAddressingMode() << "\n";
761 /// Generate the address of a constant pool entry.
763 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
764 MVT::ValueType PtrVT = Op.getValueType();
765 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
766 Constant *C = CP->getConstVal();
767 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
768 SDOperand Zero = DAG.getConstant(0, PtrVT);
769 const TargetMachine &TM = DAG.getTarget();
771 if (TM.getRelocationModel() == Reloc::Static) {
772 if (!ST->usingLargeMem()) {
773 // Just return the SDOperand with the constant pool address in it.
774 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
776 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
777 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
778 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
783 "LowerConstantPool: Relocation model other than static not supported.");
788 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
789 MVT::ValueType PtrVT = Op.getValueType();
790 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
791 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
792 SDOperand Zero = DAG.getConstant(0, PtrVT);
793 const TargetMachine &TM = DAG.getTarget();
795 if (TM.getRelocationModel() == Reloc::Static) {
796 if (!ST->usingLargeMem()) {
797 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
799 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
800 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
801 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
806 "LowerJumpTable: Relocation model other than static not supported.");
811 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
812 MVT::ValueType PtrVT = Op.getValueType();
813 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
814 GlobalValue *GV = GSDN->getGlobal();
815 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
816 const TargetMachine &TM = DAG.getTarget();
817 SDOperand Zero = DAG.getConstant(0, PtrVT);
819 if (TM.getRelocationModel() == Reloc::Static) {
820 if (!ST->usingLargeMem()) {
821 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
823 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
824 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
825 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
828 cerr << "LowerGlobalAddress: Relocation model other than static not "
837 //! Custom lower i64 integer constants
839 This code inserts all of the necessary juggling that needs to occur to load
840 a 64-bit constant into a register.
843 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
844 unsigned VT = Op.getValueType();
845 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
847 if (VT == MVT::i64) {
848 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
849 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
850 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
852 cerr << "LowerConstant: unhandled constant type "
853 << MVT::getValueTypeString(VT)
862 //! Custom lower double precision floating point constants
864 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
865 unsigned VT = Op.getValueType();
866 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
869 "LowerConstantFP: Node is not ConstantFPSDNode");
871 if (VT == MVT::f64) {
872 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
873 return DAG.getNode(ISD::BIT_CONVERT, VT,
874 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
880 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
882 LowerBRCOND(SDOperand Op, SelectionDAG &DAG)
884 SDOperand Cond = Op.getOperand(1);
885 MVT::ValueType CondVT = Cond.getValueType();
886 MVT::ValueType CondNVT;
888 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
889 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
890 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
892 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
895 return SDOperand(); // Unchanged
899 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
901 MachineFunction &MF = DAG.getMachineFunction();
902 MachineFrameInfo *MFI = MF.getFrameInfo();
903 MachineRegisterInfo &RegInfo = MF.getRegInfo();
904 SmallVector<SDOperand, 8> ArgValues;
905 SDOperand Root = Op.getOperand(0);
906 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
908 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
909 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
911 unsigned ArgOffset = SPUFrameInfo::minStackSize();
912 unsigned ArgRegIdx = 0;
913 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
915 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
917 // Add DAG nodes to load the arguments or copy them out of registers.
918 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
920 bool needsLoad = false;
921 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
922 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
926 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
927 << MVT::getValueTypeString(ObjectVT)
932 if (!isVarArg && ArgRegIdx < NumArgRegs) {
933 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
934 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
935 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
942 if (!isVarArg && ArgRegIdx < NumArgRegs) {
943 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
944 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
945 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
952 if (!isVarArg && ArgRegIdx < NumArgRegs) {
953 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
954 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
955 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
962 if (!isVarArg && ArgRegIdx < NumArgRegs) {
963 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
964 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
965 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
972 if (!isVarArg && ArgRegIdx < NumArgRegs) {
973 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
974 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
975 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
982 if (!isVarArg && ArgRegIdx < NumArgRegs) {
983 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
984 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
985 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
997 if (!isVarArg && ArgRegIdx < NumArgRegs) {
998 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
999 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1000 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1008 // We need to load the argument to a virtual register if we determined above
1009 // that we ran out of physical registers of the appropriate type
1011 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1012 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1013 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1014 ArgOffset += StackSlotSize;
1017 ArgValues.push_back(ArgVal);
1020 // If the function takes variable number of arguments, make a frame index for
1021 // the start of the first vararg value... for expansion of llvm.va_start.
1023 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1025 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1026 // If this function is vararg, store any remaining integer argument regs to
1027 // their spots on the stack so that they may be loaded by deferencing the
1028 // result of va_next.
1029 SmallVector<SDOperand, 8> MemOps;
1030 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1031 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1032 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1033 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1034 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1035 MemOps.push_back(Store);
1036 // Increment the address by four for the next argument to store
1037 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1038 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1040 if (!MemOps.empty())
1041 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1044 ArgValues.push_back(Root);
1046 // Return the new list of results.
1047 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1048 Op.Val->value_end());
1049 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1052 /// isLSAAddress - Return the immediate to use if the specified
1053 /// value is representable as a LSA address.
1054 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1055 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1058 int Addr = C->getValue();
1059 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1060 (Addr << 14 >> 14) != Addr)
1061 return 0; // Top 14 bits have to be sext of immediate.
1063 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1068 LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1069 SDOperand Chain = Op.getOperand(0);
1071 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1072 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1074 SDOperand Callee = Op.getOperand(4);
1075 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1076 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1077 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1078 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1080 // Handy pointer type
1081 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1083 // Accumulate how many bytes are to be pushed on the stack, including the
1084 // linkage area, and parameter passing area. According to the SPU ABI,
1085 // we minimally need space for [LR] and [SP]
1086 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1088 // Set up a copy of the stack pointer for use loading and storing any
1089 // arguments that may not fit in the registers available for argument
1091 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1093 // Figure out which arguments are going to go in registers, and which in
1095 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1096 unsigned ArgRegIdx = 0;
1098 // Keep track of registers passing arguments
1099 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1100 // And the arguments passed on the stack
1101 SmallVector<SDOperand, 8> MemOpChains;
1103 for (unsigned i = 0; i != NumOps; ++i) {
1104 SDOperand Arg = Op.getOperand(5+2*i);
1106 // PtrOff will be used to store the current argument to the stack if a
1107 // register cannot be found for it.
1108 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1109 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1111 switch (Arg.getValueType()) {
1112 default: assert(0 && "Unexpected ValueType for argument!");
1116 if (ArgRegIdx != NumArgRegs) {
1117 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1119 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1120 ArgOffset += StackSlotSize;
1125 if (ArgRegIdx != NumArgRegs) {
1126 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1128 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1129 ArgOffset += StackSlotSize;
1136 if (ArgRegIdx != NumArgRegs) {
1137 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1139 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1140 ArgOffset += StackSlotSize;
1146 // Update number of stack bytes actually used, insert a call sequence start
1147 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1148 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1150 if (!MemOpChains.empty()) {
1151 // Adjust the stack pointer for the stack arguments.
1152 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1153 &MemOpChains[0], MemOpChains.size());
1156 // Build a sequence of copy-to-reg nodes chained together with token chain
1157 // and flag operands which copy the outgoing args into the appropriate regs.
1159 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1160 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1162 InFlag = Chain.getValue(1);
1165 std::vector<MVT::ValueType> NodeTys;
1166 NodeTys.push_back(MVT::Other); // Returns a chain
1167 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1169 SmallVector<SDOperand, 8> Ops;
1170 unsigned CallOpc = SPUISD::CALL;
1172 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1173 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1174 // node so that legalize doesn't hack it.
1175 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1176 GlobalValue *GV = G->getGlobal();
1177 unsigned CalleeVT = Callee.getValueType();
1178 SDOperand Zero = DAG.getConstant(0, PtrVT);
1179 SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1181 if (!ST->usingLargeMem()) {
1182 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1183 // style calls, otherwise, external symbols are BRASL calls. This assumes
1184 // that declared/defined symbols are in the same compilation unit and can
1185 // be reached through PC-relative jumps.
1188 // This may be an unsafe assumption for JIT and really large compilation
1190 if (GV->isDeclaration()) {
1191 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1193 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1196 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1198 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1200 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1201 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1202 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1203 // If this is an absolute destination address that appears to be a legal
1204 // local store address, use the munged value.
1205 Callee = SDOperand(Dest, 0);
1208 Ops.push_back(Chain);
1209 Ops.push_back(Callee);
1211 // Add argument registers to the end of the list so that they are known live
1213 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1214 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1215 RegsToPass[i].second.getValueType()));
1218 Ops.push_back(InFlag);
1219 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1220 InFlag = Chain.getValue(1);
1222 Chain = DAG.getCALLSEQ_END(Chain,
1223 DAG.getConstant(NumStackBytes, PtrVT),
1224 DAG.getConstant(0, PtrVT),
1226 if (Op.Val->getValueType(0) != MVT::Other)
1227 InFlag = Chain.getValue(1);
1229 SDOperand ResultVals[3];
1230 unsigned NumResults = 0;
1233 // If the call has results, copy the values out of the ret val registers.
1234 switch (Op.Val->getValueType(0)) {
1235 default: assert(0 && "Unexpected ret value!");
1236 case MVT::Other: break;
1238 if (Op.Val->getValueType(1) == MVT::i32) {
1239 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1240 ResultVals[0] = Chain.getValue(0);
1241 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1242 Chain.getValue(2)).getValue(1);
1243 ResultVals[1] = Chain.getValue(0);
1245 NodeTys.push_back(MVT::i32);
1247 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1248 ResultVals[0] = Chain.getValue(0);
1251 NodeTys.push_back(MVT::i32);
1254 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1255 ResultVals[0] = Chain.getValue(0);
1257 NodeTys.push_back(MVT::i64);
1261 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1262 InFlag).getValue(1);
1263 ResultVals[0] = Chain.getValue(0);
1265 NodeTys.push_back(Op.Val->getValueType(0));
1272 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1273 InFlag).getValue(1);
1274 ResultVals[0] = Chain.getValue(0);
1276 NodeTys.push_back(Op.Val->getValueType(0));
1280 NodeTys.push_back(MVT::Other);
1282 // If the function returns void, just return the chain.
1283 if (NumResults == 0)
1286 // Otherwise, merge everything together with a MERGE_VALUES node.
1287 ResultVals[NumResults++] = Chain;
1288 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1289 ResultVals, NumResults);
1290 return Res.getValue(Op.ResNo);
1294 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1295 SmallVector<CCValAssign, 16> RVLocs;
1296 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1297 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1298 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1299 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1301 // If this is the first return lowered for this function, add the regs to the
1302 // liveout set for the function.
1303 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1304 for (unsigned i = 0; i != RVLocs.size(); ++i)
1305 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1308 SDOperand Chain = Op.getOperand(0);
1311 // Copy the result values into the output registers.
1312 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1313 CCValAssign &VA = RVLocs[i];
1314 assert(VA.isRegLoc() && "Can only return in registers!");
1315 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1316 Flag = Chain.getValue(1);
1320 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1322 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1326 //===----------------------------------------------------------------------===//
1327 // Vector related lowering:
1328 //===----------------------------------------------------------------------===//
1330 static ConstantSDNode *
1331 getVecImm(SDNode *N) {
1332 SDOperand OpVal(0, 0);
1334 // Check to see if this buildvec has a single non-undef value in its elements.
1335 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1336 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1338 OpVal = N->getOperand(i);
1339 else if (OpVal != N->getOperand(i))
1343 if (OpVal.Val != 0) {
1344 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1349 return 0; // All UNDEF: use implicit def.; not Constant node
1352 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1353 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1355 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1356 MVT::ValueType ValueType) {
1357 if (ConstantSDNode *CN = getVecImm(N)) {
1358 uint64_t Value = CN->getValue();
1359 if (ValueType == MVT::i64) {
1360 uint64_t UValue = CN->getValue();
1361 uint32_t upper = uint32_t(UValue >> 32);
1362 uint32_t lower = uint32_t(UValue);
1365 Value = Value >> 32;
1367 if (Value <= 0x3ffff)
1368 return DAG.getConstant(Value, ValueType);
1374 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1375 /// and the value fits into a signed 16-bit constant, and if so, return the
1377 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1378 MVT::ValueType ValueType) {
1379 if (ConstantSDNode *CN = getVecImm(N)) {
1380 int64_t Value = CN->getSignExtended();
1381 if (ValueType == MVT::i64) {
1382 uint64_t UValue = CN->getValue();
1383 uint32_t upper = uint32_t(UValue >> 32);
1384 uint32_t lower = uint32_t(UValue);
1387 Value = Value >> 32;
1389 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1390 return DAG.getConstant(Value, ValueType);
1397 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1398 /// and the value fits into a signed 10-bit constant, and if so, return the
1400 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1401 MVT::ValueType ValueType) {
1402 if (ConstantSDNode *CN = getVecImm(N)) {
1403 int64_t Value = CN->getSignExtended();
1404 if (ValueType == MVT::i64) {
1405 uint64_t UValue = CN->getValue();
1406 uint32_t upper = uint32_t(UValue >> 32);
1407 uint32_t lower = uint32_t(UValue);
1410 Value = Value >> 32;
1412 if (isS10Constant(Value))
1413 return DAG.getConstant(Value, ValueType);
1419 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1420 /// and the value fits into a signed 8-bit constant, and if so, return the
1423 /// @note: The incoming vector is v16i8 because that's the only way we can load
1424 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1426 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1427 MVT::ValueType ValueType) {
1428 if (ConstantSDNode *CN = getVecImm(N)) {
1429 int Value = (int) CN->getValue();
1430 if (ValueType == MVT::i16
1431 && Value <= 0xffff /* truncated from uint64_t */
1432 && ((short) Value >> 8) == ((short) Value & 0xff))
1433 return DAG.getConstant(Value & 0xff, ValueType);
1434 else if (ValueType == MVT::i8
1435 && (Value & 0xff) == Value)
1436 return DAG.getConstant(Value, ValueType);
1442 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1443 /// and the value fits into a signed 16-bit constant, and if so, return the
1445 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1446 MVT::ValueType ValueType) {
1447 if (ConstantSDNode *CN = getVecImm(N)) {
1448 uint64_t Value = CN->getValue();
1449 if ((ValueType == MVT::i32
1450 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1451 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1452 return DAG.getConstant(Value >> 16, ValueType);
1458 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1459 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1460 if (ConstantSDNode *CN = getVecImm(N)) {
1461 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1467 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1468 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1469 if (ConstantSDNode *CN = getVecImm(N)) {
1470 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1476 // If this is a vector of constants or undefs, get the bits. A bit in
1477 // UndefBits is set if the corresponding element of the vector is an
1478 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1479 // zero. Return true if this is not an array of constants, false if it is.
1481 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1482 uint64_t UndefBits[2]) {
1483 // Start with zero'd results.
1484 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1486 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1487 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1488 SDOperand OpVal = BV->getOperand(i);
1490 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1491 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1493 uint64_t EltBits = 0;
1494 if (OpVal.getOpcode() == ISD::UNDEF) {
1495 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1496 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1498 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1499 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1500 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1501 const APFloat &apf = CN->getValueAPF();
1502 EltBits = (CN->getValueType(0) == MVT::f32
1503 ? FloatToBits(apf.convertToFloat())
1504 : DoubleToBits(apf.convertToDouble()));
1506 // Nonconstant element.
1510 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1513 //printf("%llx %llx %llx %llx\n",
1514 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1518 /// If this is a splat (repetition) of a value across the whole vector, return
1519 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1520 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1521 /// SplatSize = 1 byte.
1522 static bool isConstantSplat(const uint64_t Bits128[2],
1523 const uint64_t Undef128[2],
1525 uint64_t &SplatBits, uint64_t &SplatUndef,
1527 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1528 // the same as the lower 64-bits, ignoring undefs.
1529 uint64_t Bits64 = Bits128[0] | Bits128[1];
1530 uint64_t Undef64 = Undef128[0] & Undef128[1];
1531 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1532 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1533 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1534 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1536 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1537 if (MinSplatBits < 64) {
1539 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1541 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1542 if (MinSplatBits < 32) {
1544 // If the top 16-bits are different than the lower 16-bits, ignoring
1545 // undefs, we have an i32 splat.
1546 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1547 if (MinSplatBits < 16) {
1548 // If the top 8-bits are different than the lower 8-bits, ignoring
1549 // undefs, we have an i16 splat.
1550 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1551 // Otherwise, we have an 8-bit splat.
1552 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1553 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1559 SplatUndef = Undef16;
1566 SplatUndef = Undef32;
1572 SplatBits = Bits128[0];
1573 SplatUndef = Undef128[0];
1579 return false; // Can't be a splat if two pieces don't match.
1582 // If this is a case we can't handle, return null and let the default
1583 // expansion code take care of it. If we CAN select this case, and if it
1584 // selects to a single instruction, return Op. Otherwise, if we can codegen
1585 // this case more efficiently than a constant pool load, lower it to the
1586 // sequence of ops that should be used.
1587 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1588 MVT::ValueType VT = Op.getValueType();
1589 // If this is a vector of constants or undefs, get the bits. A bit in
1590 // UndefBits is set if the corresponding element of the vector is an
1591 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1593 uint64_t VectorBits[2];
1594 uint64_t UndefBits[2];
1595 uint64_t SplatBits, SplatUndef;
1597 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1598 || !isConstantSplat(VectorBits, UndefBits,
1599 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1600 SplatBits, SplatUndef, SplatSize))
1601 return SDOperand(); // Not a constant vector, not a splat.
1606 uint32_t Value32 = SplatBits;
1607 assert(SplatSize == 4
1608 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1609 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1610 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1611 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1612 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1616 uint64_t f64val = SplatBits;
1617 assert(SplatSize == 8
1618 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1619 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1620 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1621 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1622 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1626 // 8-bit constants have to be expanded to 16-bits
1627 unsigned short Value16 = SplatBits | (SplatBits << 8);
1629 for (int i = 0; i < 8; ++i)
1630 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1631 return DAG.getNode(ISD::BIT_CONVERT, VT,
1632 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1635 unsigned short Value16;
1637 Value16 = (unsigned short) (SplatBits & 0xffff);
1639 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1640 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1642 for (int i = 0; i < 8; ++i) Ops[i] = T;
1643 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1646 unsigned int Value = SplatBits;
1647 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1648 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1651 uint64_t val = SplatBits;
1652 uint32_t upper = uint32_t(val >> 32);
1653 uint32_t lower = uint32_t(val);
1655 if (upper == lower) {
1656 // Magic constant that can be matched by IL, ILA, et. al.
1657 SDOperand Val = DAG.getTargetConstant(val, MVT::i64);
1658 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1662 SmallVector<SDOperand, 16> ShufBytes;
1664 bool upper_special, lower_special;
1666 // NOTE: This code creates common-case shuffle masks that can be easily
1667 // detected as common expressions. It is not attempting to create highly
1668 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1670 // Detect if the upper or lower half is a special shuffle mask pattern:
1671 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1672 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1674 // Create lower vector if not a special pattern
1675 if (!lower_special) {
1676 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1677 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1678 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1679 LO32C, LO32C, LO32C, LO32C));
1682 // Create upper vector if not a special pattern
1683 if (!upper_special) {
1684 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1685 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1686 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1687 HI32C, HI32C, HI32C, HI32C));
1690 // If either upper or lower are special, then the two input operands are
1691 // the same (basically, one of them is a "don't care")
1696 if (lower_special && upper_special) {
1697 // Unhappy situation... both upper and lower are special, so punt with
1698 // a target constant:
1699 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1700 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1704 for (int i = 0; i < 4; ++i) {
1705 for (int j = 0; j < 4; ++j) {
1707 bool process_upper, process_lower;
1710 process_upper = (upper_special && (i & 1) == 0);
1711 process_lower = (lower_special && (i & 1) == 1);
1713 if (process_upper || process_lower) {
1714 if ((process_upper && upper == 0)
1715 || (process_lower && lower == 0))
1717 else if ((process_upper && upper == 0xffffffff)
1718 || (process_lower && lower == 0xffffffff))
1720 else if ((process_upper && upper == 0x80000000)
1721 || (process_lower && lower == 0x80000000))
1722 val = (j == 0 ? 0xe0 : 0x80);
1724 val = i * 4 + j + ((i & 1) * 16);
1726 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1730 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1731 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1732 &ShufBytes[0], ShufBytes.size()));
1740 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1741 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1742 /// permutation vector, V3, is monotonically increasing with one "exception"
1743 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1744 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1745 /// In either case, the net result is going to eventually invoke SHUFB to
1746 /// permute/shuffle the bytes from V1 and V2.
1748 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1749 /// control word for byte/halfword/word insertion. This takes care of a single
1750 /// element move from V2 into V1.
1752 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1753 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1754 SDOperand V1 = Op.getOperand(0);
1755 SDOperand V2 = Op.getOperand(1);
1756 SDOperand PermMask = Op.getOperand(2);
1758 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1760 // If we have a single element being moved from V1 to V2, this can be handled
1761 // using the C*[DX] compute mask instructions, but the vector elements have
1762 // to be monotonically increasing with one exception element.
1763 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1764 unsigned EltsFromV2 = 0;
1766 unsigned V2EltIdx0 = 0;
1767 unsigned CurrElt = 0;
1768 bool monotonic = true;
1769 if (EltVT == MVT::i8)
1771 else if (EltVT == MVT::i16)
1773 else if (EltVT == MVT::i32)
1776 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1778 for (unsigned i = 0, e = PermMask.getNumOperands();
1779 EltsFromV2 <= 1 && monotonic && i != e;
1782 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1785 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1787 if (SrcElt >= V2EltIdx0) {
1789 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1790 } else if (CurrElt != SrcElt) {
1797 if (EltsFromV2 == 1 && monotonic) {
1798 // Compute mask and shuffle
1799 MachineFunction &MF = DAG.getMachineFunction();
1800 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1801 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1802 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1803 // Initialize temporary register to 0
1804 SDOperand InitTempReg =
1805 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1806 // Copy register's contents as index in INSERT_MASK:
1807 SDOperand ShufMaskOp =
1808 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1809 DAG.getTargetConstant(V2Elt, MVT::i32),
1810 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1811 // Use shuffle mask in SHUFB synthetic instruction:
1812 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1814 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1815 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1817 SmallVector<SDOperand, 16> ResultMask;
1818 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1820 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1823 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1825 for (unsigned j = 0; j < BytesPerElement; ++j) {
1826 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1831 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1832 &ResultMask[0], ResultMask.size());
1833 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1837 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1838 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1840 if (Op0.Val->getOpcode() == ISD::Constant) {
1841 // For a constant, build the appropriate constant vector, which will
1842 // eventually simplify to a vector register load.
1844 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1845 SmallVector<SDOperand, 16> ConstVecValues;
1849 // Create a constant vector:
1850 switch (Op.getValueType()) {
1851 default: assert(0 && "Unexpected constant value type in "
1852 "LowerSCALAR_TO_VECTOR");
1853 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1854 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1855 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1856 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1857 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1858 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1861 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1862 for (size_t j = 0; j < n_copies; ++j)
1863 ConstVecValues.push_back(CValue);
1865 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1866 &ConstVecValues[0], ConstVecValues.size());
1868 // Otherwise, copy the value from one register to another:
1869 switch (Op0.getValueType()) {
1870 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1877 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1884 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1885 switch (Op.getValueType()) {
1887 SDOperand rA = Op.getOperand(0);
1888 SDOperand rB = Op.getOperand(1);
1889 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1890 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1891 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1892 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1894 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1898 // Multiply two v8i16 vectors (pipeline friendly version):
1899 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1900 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1901 // c) Use SELB to select upper and lower halves from the intermediate results
1903 // NOTE: We really want to move the FSMBI to earlier to actually get the
1904 // dual-issue. This code does manage to do this, even if it's a little on
1907 MachineFunction &MF = DAG.getMachineFunction();
1908 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1909 SDOperand Chain = Op.getOperand(0);
1910 SDOperand rA = Op.getOperand(0);
1911 SDOperand rB = Op.getOperand(1);
1912 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1913 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1916 DAG.getCopyToReg(Chain, FSMBIreg,
1917 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1918 DAG.getConstant(0xcccc, MVT::i32)));
1921 DAG.getCopyToReg(FSMBOp, HiProdReg,
1922 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1924 SDOperand HHProd_v4i32 =
1925 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1926 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1928 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1929 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1930 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1931 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1933 DAG.getConstant(16, MVT::i16))),
1934 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1937 // This M00sE is N@stI! (apologies to Monty Python)
1939 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1940 // is to break it all apart, sign extend, and reassemble the various
1941 // intermediate products.
1943 SDOperand rA = Op.getOperand(0);
1944 SDOperand rB = Op.getOperand(1);
1945 SDOperand c8 = DAG.getConstant(8, MVT::i32);
1946 SDOperand c16 = DAG.getConstant(16, MVT::i32);
1949 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1950 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1951 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1953 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1955 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1958 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1959 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1961 SDOperand FSMBmask = DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1962 DAG.getConstant(0x2222, MVT::i32));
1964 SDOperand LoProdParts =
1965 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1966 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1967 LLProd, LHProd, FSMBmask));
1969 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1972 DAG.getNode(ISD::AND, MVT::v4i32,
1974 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1975 LoProdMask, LoProdMask,
1976 LoProdMask, LoProdMask));
1979 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1980 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1983 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1984 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1987 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1988 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1989 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1991 SDOperand HHProd_1 =
1992 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1993 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1994 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1995 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1996 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1999 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2001 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2005 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2007 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2008 DAG.getNode(ISD::OR, MVT::v4i32,
2013 cerr << "CellSPU: Unknown vector multiplication, got "
2014 << MVT::getValueTypeString(Op.getValueType())
2023 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2024 MachineFunction &MF = DAG.getMachineFunction();
2025 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2027 SDOperand A = Op.getOperand(0);
2028 SDOperand B = Op.getOperand(1);
2029 unsigned VT = Op.getValueType();
2031 unsigned VRegBR, VRegC;
2033 if (VT == MVT::f32) {
2034 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2035 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2037 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2038 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2040 // TODO: make sure we're feeding FPInterp the right arguments
2041 // Right now: fi B, frest(B)
2044 // (Floating Interpolate (FP Reciprocal Estimate B))
2046 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2047 DAG.getNode(SPUISD::FPInterp, VT, B,
2048 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2050 // Computes A * BRcpl and stores in a temporary register
2052 DAG.getCopyToReg(BRcpl, VRegC,
2053 DAG.getNode(ISD::FMUL, VT, A,
2054 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2055 // What's the Chain variable do? It's magic!
2056 // TODO: set Chain = Op(0).getEntryNode()
2058 return DAG.getNode(ISD::FADD, VT,
2059 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2060 DAG.getNode(ISD::FMUL, VT,
2061 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2062 DAG.getNode(ISD::FSUB, VT, A,
2063 DAG.getNode(ISD::FMUL, VT, B,
2064 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2067 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2068 unsigned VT = Op.getValueType();
2069 SDOperand N = Op.getOperand(0);
2070 SDOperand Elt = Op.getOperand(1);
2071 SDOperand ShufMask[16];
2072 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2074 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2076 int EltNo = (int) C->getValue();
2079 if (VT == MVT::i8 && EltNo >= 16)
2080 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2081 else if (VT == MVT::i16 && EltNo >= 8)
2082 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2083 else if (VT == MVT::i32 && EltNo >= 4)
2084 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2085 else if (VT == MVT::i64 && EltNo >= 2)
2086 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2088 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2089 // i32 and i64: Element 0 is the preferred slot
2090 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2093 // Need to generate shuffle mask and extract:
2094 int prefslot_begin = -1, prefslot_end = -1;
2095 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2099 prefslot_begin = prefslot_end = 3;
2103 prefslot_begin = 2; prefslot_end = 3;
2107 prefslot_begin = 0; prefslot_end = 3;
2111 prefslot_begin = 0; prefslot_end = 7;
2116 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2117 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2119 for (int i = 0; i < 16; ++i) {
2120 // zero fill uppper part of preferred slot, don't care about the
2122 unsigned int mask_val;
2124 if (i <= prefslot_end) {
2126 ((i < prefslot_begin)
2128 : elt_byte + (i - prefslot_begin));
2130 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2132 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2135 SDOperand ShufMaskVec =
2136 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2138 sizeof(ShufMask) / sizeof(ShufMask[0]));
2140 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2141 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2142 N, N, ShufMaskVec));
2146 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2147 SDOperand VecOp = Op.getOperand(0);
2148 SDOperand ValOp = Op.getOperand(1);
2149 SDOperand IdxOp = Op.getOperand(2);
2150 MVT::ValueType VT = Op.getValueType();
2152 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2153 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2155 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2156 // Use $2 because it's always 16-byte aligned and it's available:
2157 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2160 DAG.getNode(SPUISD::SHUFB, VT,
2161 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2163 DAG.getNode(SPUISD::INSERT_MASK, VT,
2164 DAG.getNode(ISD::ADD, PtrVT,
2166 DAG.getConstant(CN->getValue(),
2172 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2174 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2176 assert(Op.getValueType() == MVT::i8);
2179 assert(0 && "Unhandled i8 math operator");
2183 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2185 SDOperand N1 = Op.getOperand(1);
2186 N0 = (N0.getOpcode() != ISD::Constant
2187 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2188 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2189 N1 = (N1.getOpcode() != ISD::Constant
2190 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2191 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2192 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2193 DAG.getNode(Opc, MVT::i16, N0, N1));
2197 SDOperand N1 = Op.getOperand(1);
2199 N0 = (N0.getOpcode() != ISD::Constant
2200 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2201 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2202 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2203 N1 = (N1.getOpcode() != ISD::Constant
2204 ? DAG.getNode(N1Opc, MVT::i16, N1)
2205 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2206 SDOperand ExpandArg =
2207 DAG.getNode(ISD::OR, MVT::i16, N0,
2208 DAG.getNode(ISD::SHL, MVT::i16,
2209 N0, DAG.getConstant(8, MVT::i16)));
2210 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2211 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2215 SDOperand N1 = Op.getOperand(1);
2217 N0 = (N0.getOpcode() != ISD::Constant
2218 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2219 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2220 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2221 N1 = (N1.getOpcode() != ISD::Constant
2222 ? DAG.getNode(N1Opc, MVT::i16, N1)
2223 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2224 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2225 DAG.getNode(Opc, MVT::i16, N0, N1));
2228 SDOperand N1 = Op.getOperand(1);
2230 N0 = (N0.getOpcode() != ISD::Constant
2231 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2232 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2233 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2234 N1 = (N1.getOpcode() != ISD::Constant
2235 ? DAG.getNode(N1Opc, MVT::i16, N1)
2236 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2237 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2238 DAG.getNode(Opc, MVT::i16, N0, N1));
2241 SDOperand N1 = Op.getOperand(1);
2243 N0 = (N0.getOpcode() != ISD::Constant
2244 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2245 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2246 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2247 N1 = (N1.getOpcode() != ISD::Constant
2248 ? DAG.getNode(N1Opc, MVT::i16, N1)
2249 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2250 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2251 DAG.getNode(Opc, MVT::i16, N0, N1));
2259 static SDOperand LowerI64Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2261 MVT::ValueType VT = Op.getValueType();
2263 MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2265 SDOperand Op0 = Op.getOperand(0);
2268 case ISD::ZERO_EXTEND:
2269 case ISD::SIGN_EXTEND:
2270 case ISD::ANY_EXTEND: {
2271 MVT::ValueType Op0VT = Op0.getValueType();
2273 MVT::getVectorType(Op0VT, (128 / MVT::getSizeInBits(Op0VT)));
2275 assert(Op0VT == MVT::i32
2276 && "CellSPU: Zero/sign extending something other than i32");
2278 unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2279 ? SPUISD::ROTBYTES_RIGHT_S
2280 : SPUISD::ROTQUAD_RZ_BYTES);
2281 SDOperand PromoteScalar =
2282 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2284 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2285 DAG.getNode(ISD::BIT_CONVERT, VecVT,
2286 DAG.getNode(NewOpc, Op0VecVT,
2288 DAG.getConstant(4, MVT::i32))));
2292 SDOperand ShiftAmt = Op.getOperand(1);
2293 unsigned ShiftAmtVT = unsigned(ShiftAmt.getValueType());
2294 SDOperand Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2295 SDOperand MaskLower =
2296 DAG.getNode(SPUISD::SELB, VecVT,
2298 DAG.getConstant(0, VecVT),
2299 DAG.getNode(SPUISD::FSMBI, VecVT,
2300 DAG.getConstant(0xff00ULL, MVT::i16)));
2301 SDOperand ShiftAmtBytes =
2302 DAG.getNode(ISD::SRL, ShiftAmtVT,
2304 DAG.getConstant(3, ShiftAmtVT));
2305 SDOperand ShiftAmtBits =
2306 DAG.getNode(ISD::AND, ShiftAmtVT,
2308 DAG.getConstant(7, ShiftAmtVT));
2310 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2311 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2312 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2313 MaskLower, ShiftAmtBytes),
2318 unsigned VT = unsigned(Op.getValueType());
2319 SDOperand ShiftAmt = Op.getOperand(1);
2320 unsigned ShiftAmtVT = unsigned(ShiftAmt.getValueType());
2321 SDOperand ShiftAmtBytes =
2322 DAG.getNode(ISD::SRL, ShiftAmtVT,
2324 DAG.getConstant(3, ShiftAmtVT));
2325 SDOperand ShiftAmtBits =
2326 DAG.getNode(ISD::AND, ShiftAmtVT,
2328 DAG.getConstant(7, ShiftAmtVT));
2330 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2331 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2332 Op0, ShiftAmtBytes),
2340 //! Lower byte immediate operations for v16i8 vectors:
2342 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2345 MVT::ValueType VT = Op.getValueType();
2347 ConstVec = Op.getOperand(0);
2348 Arg = Op.getOperand(1);
2349 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2350 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2351 ConstVec = ConstVec.getOperand(0);
2353 ConstVec = Op.getOperand(1);
2354 Arg = Op.getOperand(0);
2355 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2356 ConstVec = ConstVec.getOperand(0);
2361 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2362 uint64_t VectorBits[2];
2363 uint64_t UndefBits[2];
2364 uint64_t SplatBits, SplatUndef;
2367 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2368 && isConstantSplat(VectorBits, UndefBits,
2369 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2370 SplatBits, SplatUndef, SplatSize)) {
2371 SDOperand tcVec[16];
2372 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2373 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2375 // Turn the BUILD_VECTOR into a set of target constants:
2376 for (size_t i = 0; i < tcVecSize; ++i)
2379 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2380 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2387 //! Lower i32 multiplication
2388 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2392 cerr << "CellSPU: Unknown LowerMUL value type, got "
2393 << MVT::getValueTypeString(Op.getValueType())
2399 SDOperand rA = Op.getOperand(0);
2400 SDOperand rB = Op.getOperand(1);
2402 return DAG.getNode(ISD::ADD, MVT::i32,
2403 DAG.getNode(ISD::ADD, MVT::i32,
2404 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2405 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2406 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2413 //! Custom lowering for CTPOP (count population)
2415 Custom lowering code that counts the number ones in the input
2416 operand. SPU has such an instruction, but it counts the number of
2417 ones per byte, which then have to be accumulated.
2419 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2420 unsigned VT = Op.getValueType();
2421 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2425 SDOperand N = Op.getOperand(0);
2426 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2428 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2429 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2431 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2435 MachineFunction &MF = DAG.getMachineFunction();
2436 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2438 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2440 SDOperand N = Op.getOperand(0);
2441 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2442 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2443 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2445 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2446 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2448 // CNTB_result becomes the chain to which all of the virtual registers
2449 // CNTB_reg, SUM1_reg become associated:
2450 SDOperand CNTB_result =
2451 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2453 SDOperand CNTB_rescopy =
2454 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2456 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2458 return DAG.getNode(ISD::AND, MVT::i16,
2459 DAG.getNode(ISD::ADD, MVT::i16,
2460 DAG.getNode(ISD::SRL, MVT::i16,
2467 MachineFunction &MF = DAG.getMachineFunction();
2468 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2470 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2471 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2473 SDOperand N = Op.getOperand(0);
2474 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2475 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2476 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2477 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2479 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2480 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2482 // CNTB_result becomes the chain to which all of the virtual registers
2483 // CNTB_reg, SUM1_reg become associated:
2484 SDOperand CNTB_result =
2485 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2487 SDOperand CNTB_rescopy =
2488 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2491 DAG.getNode(ISD::SRL, MVT::i32,
2492 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2495 DAG.getNode(ISD::ADD, MVT::i32,
2496 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2498 SDOperand Sum1_rescopy =
2499 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2502 DAG.getNode(ISD::SRL, MVT::i32,
2503 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2506 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2507 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2509 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2519 /// LowerOperation - Provide custom lowering hooks for some operations.
2522 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2524 unsigned Opc = (unsigned) Op.getOpcode();
2525 unsigned VT = (unsigned) Op.getValueType();
2529 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2530 cerr << "Op.getOpcode() = " << Opc << "\n";
2531 cerr << "*Op.Val:\n";
2538 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2540 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2541 case ISD::ConstantPool:
2542 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2543 case ISD::GlobalAddress:
2544 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2545 case ISD::JumpTable:
2546 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2548 return LowerConstant(Op, DAG);
2549 case ISD::ConstantFP:
2550 return LowerConstantFP(Op, DAG);
2552 return LowerBRCOND(Op, DAG);
2553 case ISD::FORMAL_ARGUMENTS:
2554 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2556 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2558 return LowerRET(Op, DAG, getTargetMachine());
2561 // i8, i64 math ops:
2562 case ISD::ZERO_EXTEND:
2563 case ISD::SIGN_EXTEND:
2564 case ISD::ANY_EXTEND:
2572 return LowerI8Math(Op, DAG, Opc);
2573 else if (VT == MVT::i64)
2574 return LowerI64Math(Op, DAG, Opc);
2577 // Vector-related lowering.
2578 case ISD::BUILD_VECTOR:
2579 return LowerBUILD_VECTOR(Op, DAG);
2580 case ISD::SCALAR_TO_VECTOR:
2581 return LowerSCALAR_TO_VECTOR(Op, DAG);
2582 case ISD::VECTOR_SHUFFLE:
2583 return LowerVECTOR_SHUFFLE(Op, DAG);
2584 case ISD::EXTRACT_VECTOR_ELT:
2585 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2586 case ISD::INSERT_VECTOR_ELT:
2587 return LowerINSERT_VECTOR_ELT(Op, DAG);
2589 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2593 return LowerByteImmed(Op, DAG);
2595 // Vector and i8 multiply:
2597 if (MVT::isVector(VT))
2598 return LowerVectorMUL(Op, DAG);
2599 else if (VT == MVT::i8)
2600 return LowerI8Math(Op, DAG, Opc);
2602 return LowerMUL(Op, DAG, VT, Opc);
2605 if (VT == MVT::f32 || VT == MVT::v4f32)
2606 return LowerFDIVf32(Op, DAG);
2607 // else if (Op.getValueType() == MVT::f64)
2608 // return LowerFDIVf64(Op, DAG);
2610 assert(0 && "Calling FDIV on unsupported MVT");
2613 return LowerCTPOP(Op, DAG);
2619 //===----------------------------------------------------------------------===//
2620 // Target Optimization Hooks
2621 //===----------------------------------------------------------------------===//
2624 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2627 TargetMachine &TM = getTargetMachine();
2629 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2630 SelectionDAG &DAG = DCI.DAG;
2631 SDOperand Op0 = N->getOperand(0); // everything has at least one operand
2632 SDOperand Result; // Initially, NULL result
2634 switch (N->getOpcode()) {
2637 SDOperand Op1 = N->getOperand(1);
2639 if ((Op1.getOpcode() == ISD::Constant
2640 || Op1.getOpcode() == ISD::TargetConstant)
2641 && Op0.getOpcode() == SPUISD::IndirectAddr) {
2642 SDOperand Op01 = Op0.getOperand(1);
2643 if (Op01.getOpcode() == ISD::Constant
2644 || Op01.getOpcode() == ISD::TargetConstant) {
2645 // (add <const>, (SPUindirect <arg>, <const>)) ->
2646 // (SPUindirect <arg>, <const + const>)
2647 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2648 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2649 SDOperand combinedConst =
2650 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2651 Op0.getValueType());
2653 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2654 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2655 DEBUG(cerr << "With: (SPUindirect <arg>, "
2656 << CN0->getValue() + CN1->getValue() << ")\n");
2657 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2658 Op0.getOperand(0), combinedConst);
2660 } else if ((Op0.getOpcode() == ISD::Constant
2661 || Op0.getOpcode() == ISD::TargetConstant)
2662 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2663 SDOperand Op11 = Op1.getOperand(1);
2664 if (Op11.getOpcode() == ISD::Constant
2665 || Op11.getOpcode() == ISD::TargetConstant) {
2666 // (add (SPUindirect <arg>, <const>), <const>) ->
2667 // (SPUindirect <arg>, <const + const>)
2668 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2669 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2670 SDOperand combinedConst =
2671 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2672 Op0.getValueType());
2674 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2675 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2676 DEBUG(cerr << "With: (SPUindirect <arg>, "
2677 << CN0->getValue() + CN1->getValue() << ")\n");
2679 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2680 Op1.getOperand(0), combinedConst);
2685 case ISD::SIGN_EXTEND:
2686 case ISD::ZERO_EXTEND:
2687 case ISD::ANY_EXTEND: {
2688 if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2689 N->getValueType(0) == Op0.getValueType()) {
2690 // (any_extend (SPUextract_elt0 <arg>)) ->
2691 // (SPUextract_elt0 <arg>)
2692 // Types must match, however...
2693 DEBUG(cerr << "Replace: ");
2694 DEBUG(N->dump(&DAG));
2695 DEBUG(cerr << "\nWith: ");
2696 DEBUG(Op0.Val->dump(&DAG));
2697 DEBUG(cerr << "\n");
2703 case SPUISD::IndirectAddr: {
2704 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2705 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2706 if (CN->getValue() == 0) {
2707 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2708 // (SPUaform <addr>, 0)
2710 DEBUG(cerr << "Replace: ");
2711 DEBUG(N->dump(&DAG));
2712 DEBUG(cerr << "\nWith: ");
2713 DEBUG(Op0.Val->dump(&DAG));
2714 DEBUG(cerr << "\n");
2721 case SPUISD::SHLQUAD_L_BITS:
2722 case SPUISD::SHLQUAD_L_BYTES:
2723 case SPUISD::VEC_SHL:
2724 case SPUISD::VEC_SRL:
2725 case SPUISD::VEC_SRA:
2726 case SPUISD::ROTQUAD_RZ_BYTES:
2727 case SPUISD::ROTQUAD_RZ_BITS: {
2728 SDOperand Op1 = N->getOperand(1);
2730 if (isa<ConstantSDNode>(Op1)) {
2731 // Kill degenerate vector shifts:
2732 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2734 if (CN->getValue() == 0) {
2740 case SPUISD::PROMOTE_SCALAR: {
2741 switch (Op0.getOpcode()) {
2744 case ISD::ANY_EXTEND:
2745 case ISD::ZERO_EXTEND:
2746 case ISD::SIGN_EXTEND: {
2747 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2749 // but only if the SPUpromote_scalar and <arg> types match.
2750 SDOperand Op00 = Op0.getOperand(0);
2751 if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2752 SDOperand Op000 = Op00.getOperand(0);
2753 if (Op000.getValueType() == N->getValueType(0)) {
2759 case SPUISD::EXTRACT_ELT0: {
2760 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2762 Result = Op0.getOperand(0);
2769 // Otherwise, return unchanged.
2772 DEBUG(cerr << "\nReplace.SPU: ");
2773 DEBUG(N->dump(&DAG));
2774 DEBUG(cerr << "\nWith: ");
2775 DEBUG(Result.Val->dump(&DAG));
2776 DEBUG(cerr << "\n");
2783 //===----------------------------------------------------------------------===//
2784 // Inline Assembly Support
2785 //===----------------------------------------------------------------------===//
2787 /// getConstraintType - Given a constraint letter, return the type of
2788 /// constraint it is for this target.
2789 SPUTargetLowering::ConstraintType
2790 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2791 if (ConstraintLetter.size() == 1) {
2792 switch (ConstraintLetter[0]) {
2799 return C_RegisterClass;
2802 return TargetLowering::getConstraintType(ConstraintLetter);
2805 std::pair<unsigned, const TargetRegisterClass*>
2806 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2807 MVT::ValueType VT) const
2809 if (Constraint.size() == 1) {
2810 // GCC RS6000 Constraint Letters
2811 switch (Constraint[0]) {
2815 return std::make_pair(0U, SPU::R64CRegisterClass);
2816 return std::make_pair(0U, SPU::R32CRegisterClass);
2819 return std::make_pair(0U, SPU::R32FPRegisterClass);
2820 else if (VT == MVT::f64)
2821 return std::make_pair(0U, SPU::R64FPRegisterClass);
2824 return std::make_pair(0U, SPU::GPRCRegisterClass);
2828 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2831 //! Compute used/known bits for a SPU operand
2833 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2837 const SelectionDAG &DAG,
2838 unsigned Depth ) const {
2839 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2841 switch (Op.getOpcode()) {
2843 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2853 case SPUISD::PROMOTE_SCALAR: {
2854 SDOperand Op0 = Op.getOperand(0);
2855 uint64_t InMask = MVT::getIntVTBitMask(Op0.getValueType());
2856 KnownZero |= APInt(uint64_sizebits, ~InMask, false);
2857 KnownOne |= APInt(uint64_sizebits, InMask, false);
2861 case SPUISD::LDRESULT:
2862 case SPUISD::EXTRACT_ELT0:
2863 case SPUISD::EXTRACT_ELT0_CHAINED: {
2864 uint64_t InMask = MVT::getIntVTBitMask(Op.getValueType());
2865 KnownZero |= APInt(uint64_sizebits, ~InMask, false);
2866 KnownOne |= APInt(uint64_sizebits, InMask, false);
2871 case EXTRACT_I1_ZEXT:
2872 case EXTRACT_I1_SEXT:
2873 case EXTRACT_I8_ZEXT:
2874 case EXTRACT_I8_SEXT:
2879 case SHLQUAD_L_BITS:
2880 case SHLQUAD_L_BYTES:
2886 case ROTQUAD_RZ_BYTES:
2887 case ROTQUAD_RZ_BITS:
2888 case ROTBYTES_RIGHT_S:
2890 case ROTBYTES_LEFT_CHAINED:
2901 // LowerAsmOperandForConstraint
2903 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2904 char ConstraintLetter,
2905 std::vector<SDOperand> &Ops,
2906 SelectionDAG &DAG) {
2907 // Default, for the time being, to the base class handler
2908 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2911 /// isLegalAddressImmediate - Return true if the integer value can be used
2912 /// as the offset of the target addressing mode.
2913 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2914 // SPU's addresses are 256K:
2915 return (V > -(1 << 18) && V < (1 << 18) - 1);
2918 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {