1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT mapping to useful data for Cell SPU
41 struct valtype_map_s {
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDValue &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 || Opc == ISD::JumpTable
94 || Opc == ISD::ConstantPool
95 || Opc == ISD::ExternalSymbol
96 || Opc == ISD::TargetGlobalAddress
97 || Opc == ISD::TargetGlobalTLSAddress
98 || Opc == ISD::TargetJumpTable
99 || Opc == ISD::TargetConstantPool
100 || Opc == ISD::TargetExternalSymbol
101 || Opc == SPUISD::AFormAddr);
104 //! Predicate that returns true if the operand is an indirect target
105 bool isIndirectOperand(const SDValue &Op)
107 const unsigned Opc = Op.getOpcode();
108 return (Opc == ISD::Register
109 || Opc == SPUISD::LDRESULT);
113 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114 : TargetLowering(TM),
117 // Fold away setcc operations if possible.
120 // Use _setjmp/_longjmp instead of setjmp/longjmp.
121 setUseUnderscoreSetJmp(true);
122 setUseUnderscoreLongJmp(true);
124 // Set up the SPU's register classes:
125 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
126 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
127 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
128 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
129 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
130 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
131 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
133 // SPU has no sign or zero extended loads for i1, i8, i16:
134 setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote);
135 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
136 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
137 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
138 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
139 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
140 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
141 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
143 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
144 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
145 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
146 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
147 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
148 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
149 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
150 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
152 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
153 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
154 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
156 // SPU constant load actions are custom lowered:
157 setOperationAction(ISD::Constant, MVT::i64, Custom);
158 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
159 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
161 // SPU's loads and stores have to be custom lowered:
162 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
164 MVT VT = (MVT::SimpleValueType)sctype;
166 setOperationAction(ISD::LOAD, VT, Custom);
167 setOperationAction(ISD::STORE, VT, Custom);
170 // Custom lower BRCOND for i1, i8 to "promote" the result to
171 // i32 and i16, respectively.
172 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
174 // Expand the jumptable branches
175 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
176 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
177 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
179 // SPU has no intrinsics for these particular operations:
180 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
182 // PowerPC has no SREM/UREM instructions
183 setOperationAction(ISD::SREM, MVT::i32, Expand);
184 setOperationAction(ISD::UREM, MVT::i32, Expand);
185 setOperationAction(ISD::SREM, MVT::i64, Expand);
186 setOperationAction(ISD::UREM, MVT::i64, Expand);
188 // We don't support sin/cos/sqrt/fmod
189 setOperationAction(ISD::FSIN , MVT::f64, Expand);
190 setOperationAction(ISD::FCOS , MVT::f64, Expand);
191 setOperationAction(ISD::FREM , MVT::f64, Expand);
192 setOperationAction(ISD::FSIN , MVT::f32, Expand);
193 setOperationAction(ISD::FCOS , MVT::f32, Expand);
194 setOperationAction(ISD::FREM , MVT::f32, Expand);
196 // If we're enabling GP optimizations, use hardware square root
197 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
198 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
200 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
201 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
203 // SPU can do rotate right and left, so legalize it... but customize for i8
204 // because instructions don't exist.
205 setOperationAction(ISD::ROTR, MVT::i32, Legal);
206 setOperationAction(ISD::ROTR, MVT::i16, Legal);
207 setOperationAction(ISD::ROTR, MVT::i8, Custom);
208 setOperationAction(ISD::ROTL, MVT::i32, Legal);
209 setOperationAction(ISD::ROTL, MVT::i16, Legal);
210 setOperationAction(ISD::ROTL, MVT::i8, Custom);
211 // SPU has no native version of shift left/right for i8
212 setOperationAction(ISD::SHL, MVT::i8, Custom);
213 setOperationAction(ISD::SRL, MVT::i8, Custom);
214 setOperationAction(ISD::SRA, MVT::i8, Custom);
215 // And SPU needs custom lowering for shift left/right for i64
216 setOperationAction(ISD::SHL, MVT::i64, Custom);
217 setOperationAction(ISD::SRL, MVT::i64, Custom);
218 setOperationAction(ISD::SRA, MVT::i64, Custom);
220 // Custom lower i8, i32 and i64 multiplications
221 setOperationAction(ISD::MUL, MVT::i8, Custom);
222 setOperationAction(ISD::MUL, MVT::i32, Custom);
223 setOperationAction(ISD::MUL, MVT::i64, Custom);
225 // Need to custom handle (some) common i8, i64 math ops
226 setOperationAction(ISD::ADD, MVT::i64, Custom);
227 setOperationAction(ISD::SUB, MVT::i8, Custom);
228 setOperationAction(ISD::SUB, MVT::i64, Custom);
230 // SPU does not have BSWAP. It does have i32 support CTLZ.
231 // CTPOP has to be custom lowered.
232 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
233 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
235 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
236 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
237 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
238 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
240 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
241 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
243 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
245 // SPU has a version of select that implements (a&~c)|(b&c), just like
246 // select ought to work:
247 setOperationAction(ISD::SELECT, MVT::i1, Promote);
248 setOperationAction(ISD::SELECT, MVT::i8, Legal);
249 setOperationAction(ISD::SELECT, MVT::i16, Legal);
250 setOperationAction(ISD::SELECT, MVT::i32, Legal);
251 setOperationAction(ISD::SELECT, MVT::i64, Expand);
253 setOperationAction(ISD::SETCC, MVT::i1, Promote);
254 setOperationAction(ISD::SETCC, MVT::i8, Legal);
255 setOperationAction(ISD::SETCC, MVT::i16, Legal);
256 setOperationAction(ISD::SETCC, MVT::i32, Legal);
257 setOperationAction(ISD::SETCC, MVT::i64, Expand);
259 // Zero extension and sign extension for i64 have to be
261 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
262 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
263 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
265 // SPU has a legal FP -> signed INT instruction
266 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
267 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
268 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
269 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
271 // FDIV on SPU requires custom lowering
272 setOperationAction(ISD::FDIV, MVT::f32, Custom);
273 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
275 // SPU has [U|S]INT_TO_FP
276 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
277 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
278 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
279 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
280 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
281 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
282 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
283 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
285 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
286 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
287 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
288 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
290 // We cannot sextinreg(i1). Expand to shifts.
291 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
293 // Support label based line numbers.
294 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
295 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
297 // We want to legalize GlobalAddress and ConstantPool nodes into the
298 // appropriate instructions to materialize the address.
299 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
301 MVT VT = (MVT::SimpleValueType)sctype;
303 setOperationAction(ISD::GlobalAddress, VT, Custom);
304 setOperationAction(ISD::ConstantPool, VT, Custom);
305 setOperationAction(ISD::JumpTable, VT, Custom);
308 // RET must be custom lowered, to meet ABI requirements
309 setOperationAction(ISD::RET, MVT::Other, Custom);
311 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
312 setOperationAction(ISD::VASTART , MVT::Other, Custom);
314 // Use the default implementation.
315 setOperationAction(ISD::VAARG , MVT::Other, Expand);
316 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
317 setOperationAction(ISD::VAEND , MVT::Other, Expand);
318 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
319 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
320 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
321 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
323 // Cell SPU has instructions for converting between i64 and fp.
324 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
325 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
327 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
328 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
330 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
331 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
333 // First set operation action for all vector types to expand. Then we
334 // will selectively turn on ones that can be effectively codegen'd.
335 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
336 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
337 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
338 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
339 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
340 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
342 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
343 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
344 MVT VT = (MVT::SimpleValueType)i;
346 // add/sub are legal for all supported vector VT's.
347 setOperationAction(ISD::ADD , VT, Legal);
348 setOperationAction(ISD::SUB , VT, Legal);
349 // mul has to be custom lowered.
350 setOperationAction(ISD::MUL , VT, Custom);
352 setOperationAction(ISD::AND , VT, Legal);
353 setOperationAction(ISD::OR , VT, Legal);
354 setOperationAction(ISD::XOR , VT, Legal);
355 setOperationAction(ISD::LOAD , VT, Legal);
356 setOperationAction(ISD::SELECT, VT, Legal);
357 setOperationAction(ISD::STORE, VT, Legal);
359 // These operations need to be expanded:
360 setOperationAction(ISD::SDIV, VT, Expand);
361 setOperationAction(ISD::SREM, VT, Expand);
362 setOperationAction(ISD::UDIV, VT, Expand);
363 setOperationAction(ISD::UREM, VT, Expand);
364 setOperationAction(ISD::FDIV, VT, Custom);
366 // Custom lower build_vector, constant pool spills, insert and
367 // extract vector elements:
368 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
369 setOperationAction(ISD::ConstantPool, VT, Custom);
370 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
371 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
372 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
373 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
376 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
377 setOperationAction(ISD::AND, MVT::v16i8, Custom);
378 setOperationAction(ISD::OR, MVT::v16i8, Custom);
379 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
380 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
382 setShiftAmountType(MVT::i32);
383 setSetCCResultContents(ZeroOrOneSetCCResult);
385 setStackPointerRegisterToSaveRestore(SPU::R1);
387 // We have target-specific dag combine patterns for the following nodes:
388 setTargetDAGCombine(ISD::ADD);
389 setTargetDAGCombine(ISD::ZERO_EXTEND);
390 setTargetDAGCombine(ISD::SIGN_EXTEND);
391 setTargetDAGCombine(ISD::ANY_EXTEND);
393 computeRegisterProperties();
397 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
399 if (node_names.empty()) {
400 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
401 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
402 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
403 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
404 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
405 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
406 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
407 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
408 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
409 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
410 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
411 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
412 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
413 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
414 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
415 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
416 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
417 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
418 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
419 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
420 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
421 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
422 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
423 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
424 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
425 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
426 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
427 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
428 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
429 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
430 "SPUISD::ROTQUAD_RZ_BYTES";
431 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
432 "SPUISD::ROTQUAD_RZ_BITS";
433 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
434 "SPUISD::ROTBYTES_RIGHT_S";
435 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
436 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
437 "SPUISD::ROTBYTES_LEFT_CHAINED";
438 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
439 "SPUISD::ROTBYTES_LEFT_BITS";
440 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
441 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
442 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
443 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
444 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
445 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
446 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
447 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
448 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
451 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
453 return ((i != node_names.end()) ? i->second : 0);
456 MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
457 MVT VT = Op.getValueType();
464 //===----------------------------------------------------------------------===//
465 // Calling convention code:
466 //===----------------------------------------------------------------------===//
468 #include "SPUGenCallingConv.inc"
470 //===----------------------------------------------------------------------===//
471 // LowerOperation implementation
472 //===----------------------------------------------------------------------===//
474 /// Aligned load common code for CellSPU
476 \param[in] Op The SelectionDAG load or store operand
477 \param[in] DAG The selection DAG
478 \param[in] ST CellSPU subtarget information structure
479 \param[in,out] alignment Caller initializes this to the load or store node's
480 value from getAlignment(), may be updated while generating the aligned load
481 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
482 offset (divisible by 16, modulo 16 == 0)
483 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
484 offset of the preferred slot (modulo 16 != 0)
485 \param[in,out] VT Caller initializes this value type to the the load or store
486 node's loaded or stored value type; may be updated if an i1-extended load or
488 \param[out] was16aligned true if the base pointer had 16-byte alignment,
489 otherwise false. Can help to determine if the chunk needs to be rotated.
491 Both load and store lowering load a block of data aligned on a 16-byte
492 boundary. This is the common aligned load code shared between both.
495 AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
497 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
498 MVT &VT, bool &was16aligned)
500 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
501 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
502 SDValue basePtr = LSN->getBasePtr();
503 SDValue chain = LSN->getChain();
505 if (basePtr.getOpcode() == ISD::ADD) {
506 SDValue Op1 = basePtr.getNode()->getOperand(1);
508 if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
509 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
511 alignOffs = (int) CN->getValue();
512 prefSlotOffs = (int) (alignOffs & 0xf);
514 // Adjust the rotation amount to ensure that the final result ends up in
515 // the preferred slot:
516 prefSlotOffs -= vtm->prefslot_byte;
517 basePtr = basePtr.getOperand(0);
519 // Loading from memory, can we adjust alignment?
520 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
521 SDValue APtr = basePtr.getOperand(0);
522 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
523 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
524 alignment = GSDN->getGlobal()->getAlignment();
529 prefSlotOffs = -vtm->prefslot_byte;
531 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
532 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
533 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
534 prefSlotOffs = (int) (alignOffs & 0xf);
535 prefSlotOffs -= vtm->prefslot_byte;
536 basePtr = DAG.getRegister(SPU::R1, VT);
539 prefSlotOffs = -vtm->prefslot_byte;
542 if (alignment == 16) {
543 // Realign the base pointer as a D-Form address:
544 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
545 basePtr = DAG.getNode(ISD::ADD, PtrVT,
547 DAG.getConstant((alignOffs & ~0xf), PtrVT));
550 // Emit the vector load:
552 return DAG.getLoad(MVT::v16i8, chain, basePtr,
553 LSN->getSrcValue(), LSN->getSrcValueOffset(),
554 LSN->isVolatile(), 16);
557 // Unaligned load or we're using the "large memory" model, which means that
558 // we have to be very pessimistic:
559 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
560 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
564 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
565 DAG.getConstant((alignOffs & ~0xf), PtrVT));
566 was16aligned = false;
567 return DAG.getLoad(MVT::v16i8, chain, basePtr,
568 LSN->getSrcValue(), LSN->getSrcValueOffset(),
569 LSN->isVolatile(), 16);
572 /// Custom lower loads for CellSPU
574 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
575 within a 16-byte block, we have to rotate to extract the requested element.
578 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
579 LoadSDNode *LN = cast<LoadSDNode>(Op);
580 SDValue the_chain = LN->getChain();
581 MVT VT = LN->getMemoryVT();
582 MVT OpVT = Op.getNode()->getValueType(0);
583 ISD::LoadExtType ExtType = LN->getExtensionType();
584 unsigned alignment = LN->getAlignment();
587 switch (LN->getAddressingMode()) {
588 case ISD::UNINDEXED: {
592 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
594 if (result.getNode() == 0)
597 the_chain = result.getValue(1);
598 // Rotate the chunk if necessary
601 if (rotamt != 0 || !was16aligned) {
602 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
607 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
609 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
610 LoadSDNode *LN1 = cast<LoadSDNode>(result);
611 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
612 DAG.getConstant(rotamt, PtrVT));
615 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
616 the_chain = result.getValue(1);
619 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
621 MVT vecVT = MVT::v16i8;
623 // Convert the loaded v16i8 vector to the appropriate vector type
624 // specified by the operand:
627 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
629 vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
632 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
633 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
634 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
635 the_chain = result.getValue(1);
637 // Handle the sign and zero-extending loads for i1 and i8:
640 if (ExtType == ISD::SEXTLOAD) {
641 NewOpC = (OpVT == MVT::i1
642 ? SPUISD::EXTRACT_I1_SEXT
643 : SPUISD::EXTRACT_I8_SEXT);
645 assert(ExtType == ISD::ZEXTLOAD);
646 NewOpC = (OpVT == MVT::i1
647 ? SPUISD::EXTRACT_I1_ZEXT
648 : SPUISD::EXTRACT_I8_ZEXT);
651 result = DAG.getNode(NewOpC, OpVT, result);
654 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
655 SDValue retops[2] = {
660 result = DAG.getNode(SPUISD::LDRESULT, retvts,
661 retops, sizeof(retops) / sizeof(retops[0]));
668 case ISD::LAST_INDEXED_MODE:
669 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
671 cerr << (unsigned) LN->getAddressingMode() << "\n";
679 /// Custom lower stores for CellSPU
681 All CellSPU stores are aligned to 16-byte boundaries, so for elements
682 within a 16-byte block, we have to generate a shuffle to insert the
683 requested element into its place, then store the resulting block.
686 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
687 StoreSDNode *SN = cast<StoreSDNode>(Op);
688 SDValue Value = SN->getValue();
689 MVT VT = Value.getValueType();
690 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
691 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
692 unsigned alignment = SN->getAlignment();
694 switch (SN->getAddressingMode()) {
695 case ISD::UNINDEXED: {
696 int chunk_offset, slot_offset;
699 // The vector type we really want to load from the 16-byte chunk, except
700 // in the case of MVT::i1, which has to be v16i8.
701 MVT vecVT, stVecVT = MVT::v16i8;
704 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
705 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
707 SDValue alignLoadVec =
708 AlignedLoad(Op, DAG, ST, SN, alignment,
709 chunk_offset, slot_offset, VT, was16aligned);
711 if (alignLoadVec.getNode() == 0)
714 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
715 SDValue basePtr = LN->getBasePtr();
716 SDValue the_chain = alignLoadVec.getValue(1);
717 SDValue theValue = SN->getValue();
721 && (theValue.getOpcode() == ISD::AssertZext
722 || theValue.getOpcode() == ISD::AssertSext)) {
723 // Drill down and get the value for zero- and sign-extended
725 theValue = theValue.getOperand(0);
730 SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
731 SDValue insertEltPtr;
734 // If the base pointer is already a D-form address, then just create
735 // a new D-form address with a slot offset and the orignal base pointer.
736 // Otherwise generate a D-form address with the slot offset relative
737 // to the stack pointer, which is always aligned.
738 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
739 DEBUG(basePtr.getNode()->dump(&DAG));
742 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
743 (basePtr.getOpcode() == ISD::ADD
744 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
745 insertEltPtr = basePtr;
747 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
750 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
751 result = DAG.getNode(SPUISD::SHUFB, vecVT,
752 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
754 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
756 result = DAG.getStore(the_chain, result, basePtr,
757 LN->getSrcValue(), LN->getSrcValueOffset(),
758 LN->isVolatile(), LN->getAlignment());
767 case ISD::LAST_INDEXED_MODE:
768 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
770 cerr << (unsigned) SN->getAddressingMode() << "\n";
778 /// Generate the address of a constant pool entry.
780 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
781 MVT PtrVT = Op.getValueType();
782 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
783 Constant *C = CP->getConstVal();
784 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
785 SDValue Zero = DAG.getConstant(0, PtrVT);
786 const TargetMachine &TM = DAG.getTarget();
788 if (TM.getRelocationModel() == Reloc::Static) {
789 if (!ST->usingLargeMem()) {
790 // Just return the SDValue with the constant pool address in it.
791 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
793 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
794 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
795 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
800 "LowerConstantPool: Relocation model other than static not supported.");
805 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
806 MVT PtrVT = Op.getValueType();
807 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
808 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
809 SDValue Zero = DAG.getConstant(0, PtrVT);
810 const TargetMachine &TM = DAG.getTarget();
812 if (TM.getRelocationModel() == Reloc::Static) {
813 if (!ST->usingLargeMem()) {
814 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
816 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
817 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
818 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
823 "LowerJumpTable: Relocation model other than static not supported.");
828 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
829 MVT PtrVT = Op.getValueType();
830 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
831 GlobalValue *GV = GSDN->getGlobal();
832 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
833 const TargetMachine &TM = DAG.getTarget();
834 SDValue Zero = DAG.getConstant(0, PtrVT);
836 if (TM.getRelocationModel() == Reloc::Static) {
837 if (!ST->usingLargeMem()) {
838 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
840 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
841 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
842 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
845 cerr << "LowerGlobalAddress: Relocation model other than static not "
854 //! Custom lower i64 integer constants
856 This code inserts all of the necessary juggling that needs to occur to load
857 a 64-bit constant into a register.
860 LowerConstant(SDValue Op, SelectionDAG &DAG) {
861 MVT VT = Op.getValueType();
862 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
864 if (VT == MVT::i64) {
865 SDValue T = DAG.getConstant(CN->getValue(), MVT::i64);
866 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
867 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
869 cerr << "LowerConstant: unhandled constant type "
879 //! Custom lower double precision floating point constants
881 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
882 MVT VT = Op.getValueType();
883 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
886 "LowerConstantFP: Node is not ConstantFPSDNode");
888 if (VT == MVT::f64) {
889 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
890 return DAG.getNode(ISD::BIT_CONVERT, VT,
891 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
897 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
899 LowerBRCOND(SDValue Op, SelectionDAG &DAG)
901 SDValue Cond = Op.getOperand(1);
902 MVT CondVT = Cond.getValueType();
905 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
906 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
907 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
909 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
912 return SDValue(); // Unchanged
916 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
918 MachineFunction &MF = DAG.getMachineFunction();
919 MachineFrameInfo *MFI = MF.getFrameInfo();
920 MachineRegisterInfo &RegInfo = MF.getRegInfo();
921 SmallVector<SDValue, 8> ArgValues;
922 SDValue Root = Op.getOperand(0);
923 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
925 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
926 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
928 unsigned ArgOffset = SPUFrameInfo::minStackSize();
929 unsigned ArgRegIdx = 0;
930 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
932 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
934 // Add DAG nodes to load the arguments or copy them out of registers.
935 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues()-1; ArgNo != e; ++ArgNo) {
937 bool needsLoad = false;
938 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
939 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
941 switch (ObjectVT.getSimpleVT()) {
943 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
944 << ObjectVT.getMVTString()
949 if (!isVarArg && ArgRegIdx < NumArgRegs) {
950 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
951 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
952 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
959 if (!isVarArg && ArgRegIdx < NumArgRegs) {
960 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
961 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
962 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
969 if (!isVarArg && ArgRegIdx < NumArgRegs) {
970 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
971 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
972 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
979 if (!isVarArg && ArgRegIdx < NumArgRegs) {
980 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
981 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
982 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
989 if (!isVarArg && ArgRegIdx < NumArgRegs) {
990 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
991 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
992 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
999 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1000 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
1001 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1002 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
1014 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1015 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1016 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1017 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1025 // We need to load the argument to a virtual register if we determined above
1026 // that we ran out of physical registers of the appropriate type
1028 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1029 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1030 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1031 ArgOffset += StackSlotSize;
1034 ArgValues.push_back(ArgVal);
1037 // If the function takes variable number of arguments, make a frame index for
1038 // the start of the first vararg value... for expansion of llvm.va_start.
1040 VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1042 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1043 // If this function is vararg, store any remaining integer argument regs to
1044 // their spots on the stack so that they may be loaded by deferencing the
1045 // result of va_next.
1046 SmallVector<SDValue, 8> MemOps;
1047 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1048 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1049 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1050 SDValue Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1051 SDValue Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1052 MemOps.push_back(Store);
1053 // Increment the address by four for the next argument to store
1054 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1055 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1057 if (!MemOps.empty())
1058 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1061 ArgValues.push_back(Root);
1063 // Return the new list of results.
1064 return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0],
1068 /// isLSAAddress - Return the immediate to use if the specified
1069 /// value is representable as a LSA address.
1070 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1071 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1074 int Addr = C->getValue();
1075 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1076 (Addr << 14 >> 14) != Addr)
1077 return 0; // Top 14 bits have to be sext of immediate.
1079 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).getNode();
1084 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1085 SDValue Chain = Op.getOperand(0);
1087 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1088 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1090 SDValue Callee = Op.getOperand(4);
1091 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1092 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1093 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1094 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1096 // Handy pointer type
1097 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1099 // Accumulate how many bytes are to be pushed on the stack, including the
1100 // linkage area, and parameter passing area. According to the SPU ABI,
1101 // we minimally need space for [LR] and [SP]
1102 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1104 // Set up a copy of the stack pointer for use loading and storing any
1105 // arguments that may not fit in the registers available for argument
1107 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1109 // Figure out which arguments are going to go in registers, and which in
1111 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1112 unsigned ArgRegIdx = 0;
1114 // Keep track of registers passing arguments
1115 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1116 // And the arguments passed on the stack
1117 SmallVector<SDValue, 8> MemOpChains;
1119 for (unsigned i = 0; i != NumOps; ++i) {
1120 SDValue Arg = Op.getOperand(5+2*i);
1122 // PtrOff will be used to store the current argument to the stack if a
1123 // register cannot be found for it.
1124 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1125 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1127 switch (Arg.getValueType().getSimpleVT()) {
1128 default: assert(0 && "Unexpected ValueType for argument!");
1132 if (ArgRegIdx != NumArgRegs) {
1133 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1135 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1136 ArgOffset += StackSlotSize;
1141 if (ArgRegIdx != NumArgRegs) {
1142 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1144 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1145 ArgOffset += StackSlotSize;
1152 if (ArgRegIdx != NumArgRegs) {
1153 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1155 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1156 ArgOffset += StackSlotSize;
1162 // Update number of stack bytes actually used, insert a call sequence start
1163 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1164 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1166 if (!MemOpChains.empty()) {
1167 // Adjust the stack pointer for the stack arguments.
1168 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1169 &MemOpChains[0], MemOpChains.size());
1172 // Build a sequence of copy-to-reg nodes chained together with token chain
1173 // and flag operands which copy the outgoing args into the appropriate regs.
1175 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1176 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1178 InFlag = Chain.getValue(1);
1181 SmallVector<SDValue, 8> Ops;
1182 unsigned CallOpc = SPUISD::CALL;
1184 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1185 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1186 // node so that legalize doesn't hack it.
1187 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1188 GlobalValue *GV = G->getGlobal();
1189 MVT CalleeVT = Callee.getValueType();
1190 SDValue Zero = DAG.getConstant(0, PtrVT);
1191 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1193 if (!ST->usingLargeMem()) {
1194 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1195 // style calls, otherwise, external symbols are BRASL calls. This assumes
1196 // that declared/defined symbols are in the same compilation unit and can
1197 // be reached through PC-relative jumps.
1200 // This may be an unsafe assumption for JIT and really large compilation
1202 if (GV->isDeclaration()) {
1203 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1205 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1208 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1210 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1212 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1213 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1214 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1215 // If this is an absolute destination address that appears to be a legal
1216 // local store address, use the munged value.
1217 Callee = SDValue(Dest, 0);
1220 Ops.push_back(Chain);
1221 Ops.push_back(Callee);
1223 // Add argument registers to the end of the list so that they are known live
1225 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1226 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1227 RegsToPass[i].second.getValueType()));
1229 if (InFlag.getNode())
1230 Ops.push_back(InFlag);
1231 // Returns a chain and a flag for retval copy to use.
1232 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1233 &Ops[0], Ops.size());
1234 InFlag = Chain.getValue(1);
1236 Chain = DAG.getCALLSEQ_END(Chain,
1237 DAG.getConstant(NumStackBytes, PtrVT),
1238 DAG.getConstant(0, PtrVT),
1240 if (Op.getNode()->getValueType(0) != MVT::Other)
1241 InFlag = Chain.getValue(1);
1243 SDValue ResultVals[3];
1244 unsigned NumResults = 0;
1246 // If the call has results, copy the values out of the ret val registers.
1247 switch (Op.getNode()->getValueType(0).getSimpleVT()) {
1248 default: assert(0 && "Unexpected ret value!");
1249 case MVT::Other: break;
1251 if (Op.getNode()->getValueType(1) == MVT::i32) {
1252 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1253 ResultVals[0] = Chain.getValue(0);
1254 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1255 Chain.getValue(2)).getValue(1);
1256 ResultVals[1] = Chain.getValue(0);
1259 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1260 ResultVals[0] = Chain.getValue(0);
1265 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1266 ResultVals[0] = Chain.getValue(0);
1271 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.getNode()->getValueType(0),
1272 InFlag).getValue(1);
1273 ResultVals[0] = Chain.getValue(0);
1281 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.getNode()->getValueType(0),
1282 InFlag).getValue(1);
1283 ResultVals[0] = Chain.getValue(0);
1288 // If the function returns void, just return the chain.
1289 if (NumResults == 0)
1292 // Otherwise, merge everything together with a MERGE_VALUES node.
1293 ResultVals[NumResults++] = Chain;
1294 SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1295 return Res.getValue(Op.getResNo());
1299 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1300 SmallVector<CCValAssign, 16> RVLocs;
1301 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1302 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1303 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1304 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1306 // If this is the first return lowered for this function, add the regs to the
1307 // liveout set for the function.
1308 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1309 for (unsigned i = 0; i != RVLocs.size(); ++i)
1310 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1313 SDValue Chain = Op.getOperand(0);
1316 // Copy the result values into the output registers.
1317 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1318 CCValAssign &VA = RVLocs[i];
1319 assert(VA.isRegLoc() && "Can only return in registers!");
1320 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1321 Flag = Chain.getValue(1);
1325 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1327 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1331 //===----------------------------------------------------------------------===//
1332 // Vector related lowering:
1333 //===----------------------------------------------------------------------===//
1335 static ConstantSDNode *
1336 getVecImm(SDNode *N) {
1337 SDValue OpVal(0, 0);
1339 // Check to see if this buildvec has a single non-undef value in its elements.
1340 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1341 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1342 if (OpVal.getNode() == 0)
1343 OpVal = N->getOperand(i);
1344 else if (OpVal != N->getOperand(i))
1348 if (OpVal.getNode() != 0) {
1349 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1354 return 0; // All UNDEF: use implicit def.; not Constant node
1357 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1358 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1360 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1362 if (ConstantSDNode *CN = getVecImm(N)) {
1363 uint64_t Value = CN->getValue();
1364 if (ValueType == MVT::i64) {
1365 uint64_t UValue = CN->getValue();
1366 uint32_t upper = uint32_t(UValue >> 32);
1367 uint32_t lower = uint32_t(UValue);
1370 Value = Value >> 32;
1372 if (Value <= 0x3ffff)
1373 return DAG.getConstant(Value, ValueType);
1379 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1380 /// and the value fits into a signed 16-bit constant, and if so, return the
1382 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1384 if (ConstantSDNode *CN = getVecImm(N)) {
1385 int64_t Value = CN->getSignExtended();
1386 if (ValueType == MVT::i64) {
1387 uint64_t UValue = CN->getValue();
1388 uint32_t upper = uint32_t(UValue >> 32);
1389 uint32_t lower = uint32_t(UValue);
1392 Value = Value >> 32;
1394 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1395 return DAG.getConstant(Value, ValueType);
1402 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1403 /// and the value fits into a signed 10-bit constant, and if so, return the
1405 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1407 if (ConstantSDNode *CN = getVecImm(N)) {
1408 int64_t Value = CN->getSignExtended();
1409 if (ValueType == MVT::i64) {
1410 uint64_t UValue = CN->getValue();
1411 uint32_t upper = uint32_t(UValue >> 32);
1412 uint32_t lower = uint32_t(UValue);
1415 Value = Value >> 32;
1417 if (isS10Constant(Value))
1418 return DAG.getConstant(Value, ValueType);
1424 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1425 /// and the value fits into a signed 8-bit constant, and if so, return the
1428 /// @note: The incoming vector is v16i8 because that's the only way we can load
1429 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1431 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1433 if (ConstantSDNode *CN = getVecImm(N)) {
1434 int Value = (int) CN->getValue();
1435 if (ValueType == MVT::i16
1436 && Value <= 0xffff /* truncated from uint64_t */
1437 && ((short) Value >> 8) == ((short) Value & 0xff))
1438 return DAG.getConstant(Value & 0xff, ValueType);
1439 else if (ValueType == MVT::i8
1440 && (Value & 0xff) == Value)
1441 return DAG.getConstant(Value, ValueType);
1447 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1448 /// and the value fits into a signed 16-bit constant, and if so, return the
1450 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1452 if (ConstantSDNode *CN = getVecImm(N)) {
1453 uint64_t Value = CN->getValue();
1454 if ((ValueType == MVT::i32
1455 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1456 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1457 return DAG.getConstant(Value >> 16, ValueType);
1463 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1464 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1465 if (ConstantSDNode *CN = getVecImm(N)) {
1466 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1472 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1473 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1474 if (ConstantSDNode *CN = getVecImm(N)) {
1475 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1481 // If this is a vector of constants or undefs, get the bits. A bit in
1482 // UndefBits is set if the corresponding element of the vector is an
1483 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1484 // zero. Return true if this is not an array of constants, false if it is.
1486 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1487 uint64_t UndefBits[2]) {
1488 // Start with zero'd results.
1489 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1491 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1492 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1493 SDValue OpVal = BV->getOperand(i);
1495 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1496 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1498 uint64_t EltBits = 0;
1499 if (OpVal.getOpcode() == ISD::UNDEF) {
1500 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1501 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1503 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1504 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1505 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1506 const APFloat &apf = CN->getValueAPF();
1507 EltBits = (CN->getValueType(0) == MVT::f32
1508 ? FloatToBits(apf.convertToFloat())
1509 : DoubleToBits(apf.convertToDouble()));
1511 // Nonconstant element.
1515 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1518 //printf("%llx %llx %llx %llx\n",
1519 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1523 /// If this is a splat (repetition) of a value across the whole vector, return
1524 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1525 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1526 /// SplatSize = 1 byte.
1527 static bool isConstantSplat(const uint64_t Bits128[2],
1528 const uint64_t Undef128[2],
1530 uint64_t &SplatBits, uint64_t &SplatUndef,
1532 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1533 // the same as the lower 64-bits, ignoring undefs.
1534 uint64_t Bits64 = Bits128[0] | Bits128[1];
1535 uint64_t Undef64 = Undef128[0] & Undef128[1];
1536 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1537 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1538 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1539 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1541 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1542 if (MinSplatBits < 64) {
1544 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1546 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1547 if (MinSplatBits < 32) {
1549 // If the top 16-bits are different than the lower 16-bits, ignoring
1550 // undefs, we have an i32 splat.
1551 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1552 if (MinSplatBits < 16) {
1553 // If the top 8-bits are different than the lower 8-bits, ignoring
1554 // undefs, we have an i16 splat.
1555 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1556 // Otherwise, we have an 8-bit splat.
1557 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1558 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1564 SplatUndef = Undef16;
1571 SplatUndef = Undef32;
1577 SplatBits = Bits128[0];
1578 SplatUndef = Undef128[0];
1584 return false; // Can't be a splat if two pieces don't match.
1587 // If this is a case we can't handle, return null and let the default
1588 // expansion code take care of it. If we CAN select this case, and if it
1589 // selects to a single instruction, return Op. Otherwise, if we can codegen
1590 // this case more efficiently than a constant pool load, lower it to the
1591 // sequence of ops that should be used.
1592 static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1593 MVT VT = Op.getValueType();
1594 // If this is a vector of constants or undefs, get the bits. A bit in
1595 // UndefBits is set if the corresponding element of the vector is an
1596 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1598 uint64_t VectorBits[2];
1599 uint64_t UndefBits[2];
1600 uint64_t SplatBits, SplatUndef;
1602 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1603 || !isConstantSplat(VectorBits, UndefBits,
1604 VT.getVectorElementType().getSizeInBits(),
1605 SplatBits, SplatUndef, SplatSize))
1606 return SDValue(); // Not a constant vector, not a splat.
1608 switch (VT.getSimpleVT()) {
1611 uint32_t Value32 = SplatBits;
1612 assert(SplatSize == 4
1613 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1614 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1615 SDValue T = DAG.getConstant(Value32, MVT::i32);
1616 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1617 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1621 uint64_t f64val = SplatBits;
1622 assert(SplatSize == 8
1623 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1624 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1625 SDValue T = DAG.getConstant(f64val, MVT::i64);
1626 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1627 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1631 // 8-bit constants have to be expanded to 16-bits
1632 unsigned short Value16 = SplatBits | (SplatBits << 8);
1634 for (int i = 0; i < 8; ++i)
1635 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1636 return DAG.getNode(ISD::BIT_CONVERT, VT,
1637 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1640 unsigned short Value16;
1642 Value16 = (unsigned short) (SplatBits & 0xffff);
1644 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1645 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1647 for (int i = 0; i < 8; ++i) Ops[i] = T;
1648 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1651 unsigned int Value = SplatBits;
1652 SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1653 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1656 uint64_t val = SplatBits;
1657 uint32_t upper = uint32_t(val >> 32);
1658 uint32_t lower = uint32_t(val);
1660 if (upper == lower) {
1661 // Magic constant that can be matched by IL, ILA, et. al.
1662 SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1663 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1667 SmallVector<SDValue, 16> ShufBytes;
1669 bool upper_special, lower_special;
1671 // NOTE: This code creates common-case shuffle masks that can be easily
1672 // detected as common expressions. It is not attempting to create highly
1673 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1675 // Detect if the upper or lower half is a special shuffle mask pattern:
1676 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1677 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1679 // Create lower vector if not a special pattern
1680 if (!lower_special) {
1681 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1682 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1683 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1684 LO32C, LO32C, LO32C, LO32C));
1687 // Create upper vector if not a special pattern
1688 if (!upper_special) {
1689 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1690 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1691 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1692 HI32C, HI32C, HI32C, HI32C));
1695 // If either upper or lower are special, then the two input operands are
1696 // the same (basically, one of them is a "don't care")
1701 if (lower_special && upper_special) {
1702 // Unhappy situation... both upper and lower are special, so punt with
1703 // a target constant:
1704 SDValue Zero = DAG.getConstant(0, MVT::i32);
1705 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1709 for (int i = 0; i < 4; ++i) {
1711 for (int j = 0; j < 4; ++j) {
1713 bool process_upper, process_lower;
1715 process_upper = (upper_special && (i & 1) == 0);
1716 process_lower = (lower_special && (i & 1) == 1);
1718 if (process_upper || process_lower) {
1719 if ((process_upper && upper == 0)
1720 || (process_lower && lower == 0))
1722 else if ((process_upper && upper == 0xffffffff)
1723 || (process_lower && lower == 0xffffffff))
1725 else if ((process_upper && upper == 0x80000000)
1726 || (process_lower && lower == 0x80000000))
1727 val |= (j == 0 ? 0xe0 : 0x80);
1729 val |= i * 4 + j + ((i & 1) * 16);
1732 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1735 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1736 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1737 &ShufBytes[0], ShufBytes.size()));
1745 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1746 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1747 /// permutation vector, V3, is monotonically increasing with one "exception"
1748 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1749 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1750 /// In either case, the net result is going to eventually invoke SHUFB to
1751 /// permute/shuffle the bytes from V1 and V2.
1753 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1754 /// control word for byte/halfword/word insertion. This takes care of a single
1755 /// element move from V2 into V1.
1757 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1758 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1759 SDValue V1 = Op.getOperand(0);
1760 SDValue V2 = Op.getOperand(1);
1761 SDValue PermMask = Op.getOperand(2);
1763 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1765 // If we have a single element being moved from V1 to V2, this can be handled
1766 // using the C*[DX] compute mask instructions, but the vector elements have
1767 // to be monotonically increasing with one exception element.
1768 MVT EltVT = V1.getValueType().getVectorElementType();
1769 unsigned EltsFromV2 = 0;
1771 unsigned V2EltIdx0 = 0;
1772 unsigned CurrElt = 0;
1773 bool monotonic = true;
1774 if (EltVT == MVT::i8)
1776 else if (EltVT == MVT::i16)
1778 else if (EltVT == MVT::i32)
1781 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1783 for (unsigned i = 0, e = PermMask.getNumOperands();
1784 EltsFromV2 <= 1 && monotonic && i != e;
1787 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1790 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1792 if (SrcElt >= V2EltIdx0) {
1794 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1795 } else if (CurrElt != SrcElt) {
1802 if (EltsFromV2 == 1 && monotonic) {
1803 // Compute mask and shuffle
1804 MachineFunction &MF = DAG.getMachineFunction();
1805 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1806 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1807 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1808 // Initialize temporary register to 0
1809 SDValue InitTempReg =
1810 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1811 // Copy register's contents as index in INSERT_MASK:
1812 SDValue ShufMaskOp =
1813 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1814 DAG.getTargetConstant(V2Elt, MVT::i32),
1815 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1816 // Use shuffle mask in SHUFB synthetic instruction:
1817 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1819 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1820 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1822 SmallVector<SDValue, 16> ResultMask;
1823 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1825 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1828 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1830 for (unsigned j = 0; j < BytesPerElement; ++j) {
1831 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1836 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1837 &ResultMask[0], ResultMask.size());
1838 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1842 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1843 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1845 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1846 // For a constant, build the appropriate constant vector, which will
1847 // eventually simplify to a vector register load.
1849 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1850 SmallVector<SDValue, 16> ConstVecValues;
1854 // Create a constant vector:
1855 switch (Op.getValueType().getSimpleVT()) {
1856 default: assert(0 && "Unexpected constant value type in "
1857 "LowerSCALAR_TO_VECTOR");
1858 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1859 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1860 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1861 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1862 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1863 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1866 SDValue CValue = DAG.getConstant(CN->getValue(), VT);
1867 for (size_t j = 0; j < n_copies; ++j)
1868 ConstVecValues.push_back(CValue);
1870 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1871 &ConstVecValues[0], ConstVecValues.size());
1873 // Otherwise, copy the value from one register to another:
1874 switch (Op0.getValueType().getSimpleVT()) {
1875 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1882 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1889 static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1890 switch (Op.getValueType().getSimpleVT()) {
1892 cerr << "CellSPU: Unknown vector multiplication, got "
1893 << Op.getValueType().getMVTString()
1899 SDValue rA = Op.getOperand(0);
1900 SDValue rB = Op.getOperand(1);
1901 SDValue HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1902 SDValue HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1903 SDValue LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1904 SDValue Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1906 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1910 // Multiply two v8i16 vectors (pipeline friendly version):
1911 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1912 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1913 // c) Use SELB to select upper and lower halves from the intermediate results
1915 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1916 // dual-issue. This code does manage to do this, even if it's a little on
1919 MachineFunction &MF = DAG.getMachineFunction();
1920 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1921 SDValue Chain = Op.getOperand(0);
1922 SDValue rA = Op.getOperand(0);
1923 SDValue rB = Op.getOperand(1);
1924 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1925 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1928 DAG.getCopyToReg(Chain, FSMBIreg,
1929 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1930 DAG.getConstant(0xcccc, MVT::i16)));
1933 DAG.getCopyToReg(FSMBOp, HiProdReg,
1934 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1936 SDValue HHProd_v4i32 =
1937 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1938 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1940 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1941 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1942 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1943 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1945 DAG.getConstant(16, MVT::i16))),
1946 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1949 // This M00sE is N@stI! (apologies to Monty Python)
1951 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1952 // is to break it all apart, sign extend, and reassemble the various
1953 // intermediate products.
1955 SDValue rA = Op.getOperand(0);
1956 SDValue rB = Op.getOperand(1);
1957 SDValue c8 = DAG.getConstant(8, MVT::i32);
1958 SDValue c16 = DAG.getConstant(16, MVT::i32);
1961 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1962 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1963 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1965 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1967 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1970 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1971 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1973 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1974 DAG.getConstant(0x2222, MVT::i16));
1976 SDValue LoProdParts =
1977 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1978 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1979 LLProd, LHProd, FSMBmask));
1981 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1984 DAG.getNode(ISD::AND, MVT::v4i32,
1986 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1987 LoProdMask, LoProdMask,
1988 LoProdMask, LoProdMask));
1991 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1992 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1995 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1996 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1999 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2000 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2001 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2004 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2005 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2006 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
2007 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2008 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
2011 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2013 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2017 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2019 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2020 DAG.getNode(ISD::OR, MVT::v4i32,
2028 static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2029 MachineFunction &MF = DAG.getMachineFunction();
2030 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2032 SDValue A = Op.getOperand(0);
2033 SDValue B = Op.getOperand(1);
2034 MVT VT = Op.getValueType();
2036 unsigned VRegBR, VRegC;
2038 if (VT == MVT::f32) {
2039 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2040 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2042 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2043 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2045 // TODO: make sure we're feeding FPInterp the right arguments
2046 // Right now: fi B, frest(B)
2049 // (Floating Interpolate (FP Reciprocal Estimate B))
2051 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2052 DAG.getNode(SPUISD::FPInterp, VT, B,
2053 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2055 // Computes A * BRcpl and stores in a temporary register
2057 DAG.getCopyToReg(BRcpl, VRegC,
2058 DAG.getNode(ISD::FMUL, VT, A,
2059 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2060 // What's the Chain variable do? It's magic!
2061 // TODO: set Chain = Op(0).getEntryNode()
2063 return DAG.getNode(ISD::FADD, VT,
2064 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2065 DAG.getNode(ISD::FMUL, VT,
2066 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2067 DAG.getNode(ISD::FSUB, VT, A,
2068 DAG.getNode(ISD::FMUL, VT, B,
2069 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2072 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2073 MVT VT = Op.getValueType();
2074 SDValue N = Op.getOperand(0);
2075 SDValue Elt = Op.getOperand(1);
2076 SDValue ShufMask[16];
2077 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2079 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2081 int EltNo = (int) C->getValue();
2084 if (VT == MVT::i8 && EltNo >= 16)
2085 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2086 else if (VT == MVT::i16 && EltNo >= 8)
2087 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2088 else if (VT == MVT::i32 && EltNo >= 4)
2089 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2090 else if (VT == MVT::i64 && EltNo >= 2)
2091 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2093 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2094 // i32 and i64: Element 0 is the preferred slot
2095 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2098 // Need to generate shuffle mask and extract:
2099 int prefslot_begin = -1, prefslot_end = -1;
2100 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2102 switch (VT.getSimpleVT()) {
2104 assert(false && "Invalid value type!");
2106 prefslot_begin = prefslot_end = 3;
2110 prefslot_begin = 2; prefslot_end = 3;
2114 prefslot_begin = 0; prefslot_end = 3;
2118 prefslot_begin = 0; prefslot_end = 7;
2123 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2124 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2126 for (int i = 0; i < 16; ++i) {
2127 // zero fill uppper part of preferred slot, don't care about the
2129 unsigned int mask_val;
2131 if (i <= prefslot_end) {
2133 ((i < prefslot_begin)
2135 : elt_byte + (i - prefslot_begin));
2137 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2139 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2142 SDValue ShufMaskVec =
2143 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2145 sizeof(ShufMask) / sizeof(ShufMask[0]));
2147 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2148 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2149 N, N, ShufMaskVec));
2153 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2154 SDValue VecOp = Op.getOperand(0);
2155 SDValue ValOp = Op.getOperand(1);
2156 SDValue IdxOp = Op.getOperand(2);
2157 MVT VT = Op.getValueType();
2159 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2160 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2162 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2163 // Use $2 because it's always 16-byte aligned and it's available:
2164 SDValue PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2167 DAG.getNode(SPUISD::SHUFB, VT,
2168 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2170 DAG.getNode(SPUISD::INSERT_MASK, VT,
2171 DAG.getNode(ISD::ADD, PtrVT,
2173 DAG.getConstant(CN->getValue(),
2179 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2181 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2183 assert(Op.getValueType() == MVT::i8);
2186 assert(0 && "Unhandled i8 math operator");
2190 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2192 SDValue N1 = Op.getOperand(1);
2193 N0 = (N0.getOpcode() != ISD::Constant
2194 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2195 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2196 N1 = (N1.getOpcode() != ISD::Constant
2197 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2198 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2199 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2200 DAG.getNode(Opc, MVT::i16, N0, N1));
2204 SDValue N1 = Op.getOperand(1);
2206 N0 = (N0.getOpcode() != ISD::Constant
2207 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2208 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2209 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::ZERO_EXTEND : ISD::TRUNCATE;
2210 N1 = (N1.getOpcode() != ISD::Constant
2211 ? DAG.getNode(N1Opc, MVT::i16, N1)
2212 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2214 DAG.getNode(ISD::OR, MVT::i16, N0,
2215 DAG.getNode(ISD::SHL, MVT::i16,
2216 N0, DAG.getConstant(8, MVT::i16)));
2217 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2218 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2222 SDValue N1 = Op.getOperand(1);
2224 N0 = (N0.getOpcode() != ISD::Constant
2225 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2226 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2227 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::ZERO_EXTEND : ISD::TRUNCATE;
2228 N1 = (N1.getOpcode() != ISD::Constant
2229 ? DAG.getNode(N1Opc, MVT::i16, N1)
2230 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2231 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2232 DAG.getNode(Opc, MVT::i16, N0, N1));
2235 SDValue N1 = Op.getOperand(1);
2237 N0 = (N0.getOpcode() != ISD::Constant
2238 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2239 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2240 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2241 N1 = (N1.getOpcode() != ISD::Constant
2242 ? DAG.getNode(N1Opc, MVT::i16, N1)
2243 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2244 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2245 DAG.getNode(Opc, MVT::i16, N0, N1));
2248 SDValue N1 = Op.getOperand(1);
2250 N0 = (N0.getOpcode() != ISD::Constant
2251 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2252 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2253 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2254 N1 = (N1.getOpcode() != ISD::Constant
2255 ? DAG.getNode(N1Opc, MVT::i16, N1)
2256 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2257 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2258 DAG.getNode(Opc, MVT::i16, N0, N1));
2266 static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2268 MVT VT = Op.getValueType();
2269 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2271 SDValue Op0 = Op.getOperand(0);
2274 case ISD::ZERO_EXTEND:
2275 case ISD::SIGN_EXTEND:
2276 case ISD::ANY_EXTEND: {
2277 MVT Op0VT = Op0.getValueType();
2278 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2280 assert(Op0VT == MVT::i32
2281 && "CellSPU: Zero/sign extending something other than i32");
2282 DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2284 unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2285 ? SPUISD::ROTBYTES_RIGHT_S
2286 : SPUISD::ROTQUAD_RZ_BYTES);
2287 SDValue PromoteScalar =
2288 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2290 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2291 DAG.getNode(ISD::BIT_CONVERT, VecVT,
2292 DAG.getNode(NewOpc, Op0VecVT,
2294 DAG.getConstant(4, MVT::i32))));
2298 // Turn operands into vectors to satisfy type checking (shufb works on
2301 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2303 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2304 SmallVector<SDValue, 16> ShufBytes;
2306 // Create the shuffle mask for "rotating" the borrow up one register slot
2307 // once the borrow is generated.
2308 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2309 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2310 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2311 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2314 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2315 SDValue ShiftedCarry =
2316 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2318 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2319 &ShufBytes[0], ShufBytes.size()));
2321 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2322 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2323 Op0, Op1, ShiftedCarry));
2327 // Turn operands into vectors to satisfy type checking (shufb works on
2330 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2332 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2333 SmallVector<SDValue, 16> ShufBytes;
2335 // Create the shuffle mask for "rotating" the borrow up one register slot
2336 // once the borrow is generated.
2337 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2338 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2339 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2340 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2343 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2344 SDValue ShiftedBorrow =
2345 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2346 BorrowGen, BorrowGen,
2347 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2348 &ShufBytes[0], ShufBytes.size()));
2350 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2351 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2352 Op0, Op1, ShiftedBorrow));
2356 SDValue ShiftAmt = Op.getOperand(1);
2357 MVT ShiftAmtVT = ShiftAmt.getValueType();
2358 SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2360 DAG.getNode(SPUISD::SELB, VecVT,
2362 DAG.getConstant(0, VecVT),
2363 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2364 DAG.getConstant(0xff00ULL, MVT::i16)));
2365 SDValue ShiftAmtBytes =
2366 DAG.getNode(ISD::SRL, ShiftAmtVT,
2368 DAG.getConstant(3, ShiftAmtVT));
2369 SDValue ShiftAmtBits =
2370 DAG.getNode(ISD::AND, ShiftAmtVT,
2372 DAG.getConstant(7, ShiftAmtVT));
2374 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2375 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2376 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2377 MaskLower, ShiftAmtBytes),
2382 MVT VT = Op.getValueType();
2383 SDValue ShiftAmt = Op.getOperand(1);
2384 MVT ShiftAmtVT = ShiftAmt.getValueType();
2385 SDValue ShiftAmtBytes =
2386 DAG.getNode(ISD::SRL, ShiftAmtVT,
2388 DAG.getConstant(3, ShiftAmtVT));
2389 SDValue ShiftAmtBits =
2390 DAG.getNode(ISD::AND, ShiftAmtVT,
2392 DAG.getConstant(7, ShiftAmtVT));
2394 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2395 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2396 Op0, ShiftAmtBytes),
2401 // Promote Op0 to vector
2403 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2404 SDValue ShiftAmt = Op.getOperand(1);
2405 MVT ShiftVT = ShiftAmt.getValueType();
2407 // Negate variable shift amounts
2408 if (!isa<ConstantSDNode>(ShiftAmt)) {
2409 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2410 DAG.getConstant(0, ShiftVT), ShiftAmt);
2413 SDValue UpperHalfSign =
2414 DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2415 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2416 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2417 Op0, DAG.getConstant(31, MVT::i32))));
2418 SDValue UpperHalfSignMask =
2419 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2420 SDValue UpperLowerMask =
2421 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2422 DAG.getConstant(0xff00, MVT::i16));
2423 SDValue UpperLowerSelect =
2424 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2425 UpperHalfSignMask, Op0, UpperLowerMask);
2426 SDValue RotateLeftBytes =
2427 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2428 UpperLowerSelect, ShiftAmt);
2429 SDValue RotateLeftBits =
2430 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2431 RotateLeftBytes, ShiftAmt);
2433 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2441 //! Lower byte immediate operations for v16i8 vectors:
2443 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2446 MVT VT = Op.getValueType();
2448 ConstVec = Op.getOperand(0);
2449 Arg = Op.getOperand(1);
2450 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2451 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2452 ConstVec = ConstVec.getOperand(0);
2454 ConstVec = Op.getOperand(1);
2455 Arg = Op.getOperand(0);
2456 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2457 ConstVec = ConstVec.getOperand(0);
2462 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2463 uint64_t VectorBits[2];
2464 uint64_t UndefBits[2];
2465 uint64_t SplatBits, SplatUndef;
2468 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2469 && isConstantSplat(VectorBits, UndefBits,
2470 VT.getVectorElementType().getSizeInBits(),
2471 SplatBits, SplatUndef, SplatSize)) {
2473 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2474 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2476 // Turn the BUILD_VECTOR into a set of target constants:
2477 for (size_t i = 0; i < tcVecSize; ++i)
2480 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2481 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2484 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2485 // lowered. Return the operation, rather than a null SDValue.
2489 //! Lower i32 multiplication
2490 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG, MVT VT,
2492 switch (VT.getSimpleVT()) {
2494 cerr << "CellSPU: Unknown LowerMUL value type, got "
2495 << Op.getValueType().getMVTString()
2501 SDValue rA = Op.getOperand(0);
2502 SDValue rB = Op.getOperand(1);
2504 return DAG.getNode(ISD::ADD, MVT::i32,
2505 DAG.getNode(ISD::ADD, MVT::i32,
2506 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2507 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2508 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2515 //! Custom lowering for CTPOP (count population)
2517 Custom lowering code that counts the number ones in the input
2518 operand. SPU has such an instruction, but it counts the number of
2519 ones per byte, which then have to be accumulated.
2521 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2522 MVT VT = Op.getValueType();
2523 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2525 switch (VT.getSimpleVT()) {
2527 assert(false && "Invalid value type!");
2529 SDValue N = Op.getOperand(0);
2530 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2532 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2533 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2535 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2539 MachineFunction &MF = DAG.getMachineFunction();
2540 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2542 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2544 SDValue N = Op.getOperand(0);
2545 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2546 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2547 SDValue Shift1 = DAG.getConstant(8, MVT::i16);
2549 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2550 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2552 // CNTB_result becomes the chain to which all of the virtual registers
2553 // CNTB_reg, SUM1_reg become associated:
2554 SDValue CNTB_result =
2555 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2557 SDValue CNTB_rescopy =
2558 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2560 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2562 return DAG.getNode(ISD::AND, MVT::i16,
2563 DAG.getNode(ISD::ADD, MVT::i16,
2564 DAG.getNode(ISD::SRL, MVT::i16,
2571 MachineFunction &MF = DAG.getMachineFunction();
2572 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2574 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2575 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2577 SDValue N = Op.getOperand(0);
2578 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2579 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2580 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2581 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2583 SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2584 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2586 // CNTB_result becomes the chain to which all of the virtual registers
2587 // CNTB_reg, SUM1_reg become associated:
2588 SDValue CNTB_result =
2589 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2591 SDValue CNTB_rescopy =
2592 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2595 DAG.getNode(ISD::SRL, MVT::i32,
2596 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2599 DAG.getNode(ISD::ADD, MVT::i32,
2600 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2602 SDValue Sum1_rescopy =
2603 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2606 DAG.getNode(ISD::SRL, MVT::i32,
2607 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2610 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2611 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2613 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2623 /// LowerOperation - Provide custom lowering hooks for some operations.
2626 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2628 unsigned Opc = (unsigned) Op.getOpcode();
2629 MVT VT = Op.getValueType();
2633 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2634 cerr << "Op.getOpcode() = " << Opc << "\n";
2635 cerr << "*Op.getNode():\n";
2636 Op.getNode()->dump();
2642 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2644 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2645 case ISD::ConstantPool:
2646 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2647 case ISD::GlobalAddress:
2648 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2649 case ISD::JumpTable:
2650 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2652 return LowerConstant(Op, DAG);
2653 case ISD::ConstantFP:
2654 return LowerConstantFP(Op, DAG);
2656 return LowerBRCOND(Op, DAG);
2657 case ISD::FORMAL_ARGUMENTS:
2658 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2660 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2662 return LowerRET(Op, DAG, getTargetMachine());
2665 // i8, i64 math ops:
2666 case ISD::ZERO_EXTEND:
2667 case ISD::SIGN_EXTEND:
2668 case ISD::ANY_EXTEND:
2677 return LowerI8Math(Op, DAG, Opc);
2678 else if (VT == MVT::i64)
2679 return LowerI64Math(Op, DAG, Opc);
2683 // Vector-related lowering.
2684 case ISD::BUILD_VECTOR:
2685 return LowerBUILD_VECTOR(Op, DAG);
2686 case ISD::SCALAR_TO_VECTOR:
2687 return LowerSCALAR_TO_VECTOR(Op, DAG);
2688 case ISD::VECTOR_SHUFFLE:
2689 return LowerVECTOR_SHUFFLE(Op, DAG);
2690 case ISD::EXTRACT_VECTOR_ELT:
2691 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2692 case ISD::INSERT_VECTOR_ELT:
2693 return LowerINSERT_VECTOR_ELT(Op, DAG);
2695 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2699 return LowerByteImmed(Op, DAG);
2701 // Vector and i8 multiply:
2704 return LowerVectorMUL(Op, DAG);
2705 else if (VT == MVT::i8)
2706 return LowerI8Math(Op, DAG, Opc);
2708 return LowerMUL(Op, DAG, VT, Opc);
2711 if (VT == MVT::f32 || VT == MVT::v4f32)
2712 return LowerFDIVf32(Op, DAG);
2713 // else if (Op.getValueType() == MVT::f64)
2714 // return LowerFDIVf64(Op, DAG);
2716 assert(0 && "Calling FDIV on unsupported MVT");
2719 return LowerCTPOP(Op, DAG);
2725 //===----------------------------------------------------------------------===//
2726 // Target Optimization Hooks
2727 //===----------------------------------------------------------------------===//
2730 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2733 TargetMachine &TM = getTargetMachine();
2735 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2736 SelectionDAG &DAG = DCI.DAG;
2737 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2738 SDValue Result; // Initially, NULL result
2740 switch (N->getOpcode()) {
2743 SDValue Op1 = N->getOperand(1);
2745 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2746 SDValue Op01 = Op0.getOperand(1);
2747 if (Op01.getOpcode() == ISD::Constant
2748 || Op01.getOpcode() == ISD::TargetConstant) {
2749 // (add <const>, (SPUindirect <arg>, <const>)) ->
2750 // (SPUindirect <arg>, <const + const>)
2751 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2752 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2753 SDValue combinedConst =
2754 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2755 Op0.getValueType());
2757 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2758 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2759 DEBUG(cerr << "With: (SPUindirect <arg>, "
2760 << CN0->getValue() + CN1->getValue() << ")\n");
2761 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2762 Op0.getOperand(0), combinedConst);
2764 } else if (isa<ConstantSDNode>(Op0)
2765 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2766 SDValue Op11 = Op1.getOperand(1);
2767 if (Op11.getOpcode() == ISD::Constant
2768 || Op11.getOpcode() == ISD::TargetConstant) {
2769 // (add (SPUindirect <arg>, <const>), <const>) ->
2770 // (SPUindirect <arg>, <const + const>)
2771 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2772 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2773 SDValue combinedConst =
2774 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2775 Op0.getValueType());
2777 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2778 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2779 DEBUG(cerr << "With: (SPUindirect <arg>, "
2780 << CN0->getValue() + CN1->getValue() << ")\n");
2782 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2783 Op1.getOperand(0), combinedConst);
2788 case ISD::SIGN_EXTEND:
2789 case ISD::ZERO_EXTEND:
2790 case ISD::ANY_EXTEND: {
2791 if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2792 N->getValueType(0) == Op0.getValueType()) {
2793 // (any_extend (SPUextract_elt0 <arg>)) ->
2794 // (SPUextract_elt0 <arg>)
2795 // Types must match, however...
2796 DEBUG(cerr << "Replace: ");
2797 DEBUG(N->dump(&DAG));
2798 DEBUG(cerr << "\nWith: ");
2799 DEBUG(Op0.getNode()->dump(&DAG));
2800 DEBUG(cerr << "\n");
2806 case SPUISD::IndirectAddr: {
2807 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2808 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2809 if (CN->getValue() == 0) {
2810 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2811 // (SPUaform <addr>, 0)
2813 DEBUG(cerr << "Replace: ");
2814 DEBUG(N->dump(&DAG));
2815 DEBUG(cerr << "\nWith: ");
2816 DEBUG(Op0.getNode()->dump(&DAG));
2817 DEBUG(cerr << "\n");
2824 case SPUISD::SHLQUAD_L_BITS:
2825 case SPUISD::SHLQUAD_L_BYTES:
2826 case SPUISD::VEC_SHL:
2827 case SPUISD::VEC_SRL:
2828 case SPUISD::VEC_SRA:
2829 case SPUISD::ROTQUAD_RZ_BYTES:
2830 case SPUISD::ROTQUAD_RZ_BITS: {
2831 SDValue Op1 = N->getOperand(1);
2833 if (isa<ConstantSDNode>(Op1)) {
2834 // Kill degenerate vector shifts:
2835 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2837 if (CN->getValue() == 0) {
2843 case SPUISD::PROMOTE_SCALAR: {
2844 switch (Op0.getOpcode()) {
2847 case ISD::ANY_EXTEND:
2848 case ISD::ZERO_EXTEND:
2849 case ISD::SIGN_EXTEND: {
2850 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2852 // but only if the SPUpromote_scalar and <arg> types match.
2853 SDValue Op00 = Op0.getOperand(0);
2854 if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2855 SDValue Op000 = Op00.getOperand(0);
2856 if (Op000.getValueType() == N->getValueType(0)) {
2862 case SPUISD::EXTRACT_ELT0: {
2863 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2865 Result = Op0.getOperand(0);
2872 // Otherwise, return unchanged.
2874 if (Result.getNode()) {
2875 DEBUG(cerr << "\nReplace.SPU: ");
2876 DEBUG(N->dump(&DAG));
2877 DEBUG(cerr << "\nWith: ");
2878 DEBUG(Result.getNode()->dump(&DAG));
2879 DEBUG(cerr << "\n");
2886 //===----------------------------------------------------------------------===//
2887 // Inline Assembly Support
2888 //===----------------------------------------------------------------------===//
2890 /// getConstraintType - Given a constraint letter, return the type of
2891 /// constraint it is for this target.
2892 SPUTargetLowering::ConstraintType
2893 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2894 if (ConstraintLetter.size() == 1) {
2895 switch (ConstraintLetter[0]) {
2902 return C_RegisterClass;
2905 return TargetLowering::getConstraintType(ConstraintLetter);
2908 std::pair<unsigned, const TargetRegisterClass*>
2909 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2912 if (Constraint.size() == 1) {
2913 // GCC RS6000 Constraint Letters
2914 switch (Constraint[0]) {
2918 return std::make_pair(0U, SPU::R64CRegisterClass);
2919 return std::make_pair(0U, SPU::R32CRegisterClass);
2922 return std::make_pair(0U, SPU::R32FPRegisterClass);
2923 else if (VT == MVT::f64)
2924 return std::make_pair(0U, SPU::R64FPRegisterClass);
2927 return std::make_pair(0U, SPU::GPRCRegisterClass);
2931 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2934 //! Compute used/known bits for a SPU operand
2936 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2940 const SelectionDAG &DAG,
2941 unsigned Depth ) const {
2943 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2946 switch (Op.getOpcode()) {
2948 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2958 case SPUISD::PROMOTE_SCALAR: {
2959 SDValue Op0 = Op.getOperand(0);
2960 MVT Op0VT = Op0.getValueType();
2961 unsigned Op0VTBits = Op0VT.getSizeInBits();
2962 uint64_t InMask = Op0VT.getIntegerVTBitMask();
2963 KnownZero |= APInt(Op0VTBits, ~InMask, false);
2964 KnownOne |= APInt(Op0VTBits, InMask, false);
2968 case SPUISD::LDRESULT:
2969 case SPUISD::EXTRACT_ELT0:
2970 case SPUISD::EXTRACT_ELT0_CHAINED: {
2971 MVT OpVT = Op.getValueType();
2972 unsigned OpVTBits = OpVT.getSizeInBits();
2973 uint64_t InMask = OpVT.getIntegerVTBitMask();
2974 KnownZero |= APInt(OpVTBits, ~InMask, false);
2975 KnownOne |= APInt(OpVTBits, InMask, false);
2980 case EXTRACT_I1_ZEXT:
2981 case EXTRACT_I1_SEXT:
2982 case EXTRACT_I8_ZEXT:
2983 case EXTRACT_I8_SEXT:
2988 case SPUISD::SHLQUAD_L_BITS:
2989 case SPUISD::SHLQUAD_L_BYTES:
2990 case SPUISD::VEC_SHL:
2991 case SPUISD::VEC_SRL:
2992 case SPUISD::VEC_SRA:
2993 case SPUISD::VEC_ROTL:
2994 case SPUISD::VEC_ROTR:
2995 case SPUISD::ROTQUAD_RZ_BYTES:
2996 case SPUISD::ROTQUAD_RZ_BITS:
2997 case SPUISD::ROTBYTES_RIGHT_S:
2998 case SPUISD::ROTBYTES_LEFT:
2999 case SPUISD::ROTBYTES_LEFT_CHAINED:
3000 case SPUISD::SELECT_MASK:
3002 case SPUISD::FPInterp:
3003 case SPUISD::FPRecipEst:
3004 case SPUISD::SEXT32TO64:
3009 // LowerAsmOperandForConstraint
3011 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3012 char ConstraintLetter,
3013 std::vector<SDValue> &Ops,
3014 SelectionDAG &DAG) const {
3015 // Default, for the time being, to the base class handler
3016 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3019 /// isLegalAddressImmediate - Return true if the integer value can be used
3020 /// as the offset of the target addressing mode.
3021 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
3022 // SPU's addresses are 256K:
3023 return (V > -(1 << 18) && V < (1 << 18) - 1);
3026 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {