1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/VectorExtras.h"
19 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/Constants.h"
27 #include "llvm/Function.h"
28 #include "llvm/Intrinsics.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Target/TargetOptions.h"
37 // Used in getTargetNodeName() below
39 std::map<unsigned, const char *> node_names;
41 //! MVT mapping to useful data for Cell SPU
42 struct valtype_map_s {
44 const int prefslot_byte;
47 const valtype_map_s valtype_map[] = {
58 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
60 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
61 const valtype_map_s *retval = 0;
63 for (size_t i = 0; i < n_valtype_map; ++i) {
64 if (valtype_map[i].valtype == VT) {
65 retval = valtype_map + i;
72 cerr << "getValueTypeMapEntry returns NULL for "
82 //! Predicate that returns true if operand is a memory target
84 \arg Op Operand to test
85 \return true if the operand is a memory target (i.e., global
86 address, external symbol, constant pool) or an A-form
89 bool isMemoryOperand(const SDOperand &Op)
91 const unsigned Opc = Op.getOpcode();
92 return (Opc == ISD::GlobalAddress
93 || Opc == ISD::GlobalTLSAddress
94 || Opc == ISD::JumpTable
95 || Opc == ISD::ConstantPool
96 || Opc == ISD::ExternalSymbol
97 || Opc == ISD::TargetGlobalAddress
98 || Opc == ISD::TargetGlobalTLSAddress
99 || Opc == ISD::TargetJumpTable
100 || Opc == ISD::TargetConstantPool
101 || Opc == ISD::TargetExternalSymbol
102 || Opc == SPUISD::AFormAddr);
105 //! Predicate that returns true if the operand is an indirect target
106 bool isIndirectOperand(const SDOperand &Op)
108 const unsigned Opc = Op.getOpcode();
109 return (Opc == ISD::Register
110 || Opc == SPUISD::LDRESULT);
114 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
115 : TargetLowering(TM),
118 // Fold away setcc operations if possible.
121 // Use _setjmp/_longjmp instead of setjmp/longjmp.
122 setUseUnderscoreSetJmp(true);
123 setUseUnderscoreLongJmp(true);
125 // Set up the SPU's register classes:
126 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
127 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
128 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
129 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
130 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
131 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
132 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
134 // SPU has no sign or zero extended loads for i1, i8, i16:
135 setLoadXAction(ISD::EXTLOAD, MVT::i1, Promote);
136 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
137 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
138 setTruncStoreAction(MVT::i8, MVT::i1, Custom);
139 setTruncStoreAction(MVT::i16, MVT::i1, Custom);
140 setTruncStoreAction(MVT::i32, MVT::i1, Custom);
141 setTruncStoreAction(MVT::i64, MVT::i1, Custom);
142 setTruncStoreAction(MVT::i128, MVT::i1, Custom);
144 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
145 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
146 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
147 setTruncStoreAction(MVT::i8 , MVT::i8, Custom);
148 setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
149 setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
150 setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
151 setTruncStoreAction(MVT::i128, MVT::i8, Custom);
153 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
154 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
155 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
157 // SPU constant load actions are custom lowered:
158 setOperationAction(ISD::Constant, MVT::i64, Custom);
159 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
160 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
162 // SPU's loads and stores have to be custom lowered:
163 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
165 MVT VT = (MVT::SimpleValueType)sctype;
167 setOperationAction(ISD::LOAD, VT, Custom);
168 setOperationAction(ISD::STORE, VT, Custom);
171 // Custom lower BRCOND for i1, i8 to "promote" the result to
172 // i32 and i16, respectively.
173 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
175 // Expand the jumptable branches
176 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
177 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
178 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
180 // SPU has no intrinsics for these particular operations:
181 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
183 // PowerPC has no SREM/UREM instructions
184 setOperationAction(ISD::SREM, MVT::i32, Expand);
185 setOperationAction(ISD::UREM, MVT::i32, Expand);
186 setOperationAction(ISD::SREM, MVT::i64, Expand);
187 setOperationAction(ISD::UREM, MVT::i64, Expand);
189 // We don't support sin/cos/sqrt/fmod
190 setOperationAction(ISD::FSIN , MVT::f64, Expand);
191 setOperationAction(ISD::FCOS , MVT::f64, Expand);
192 setOperationAction(ISD::FREM , MVT::f64, Expand);
193 setOperationAction(ISD::FSIN , MVT::f32, Expand);
194 setOperationAction(ISD::FCOS , MVT::f32, Expand);
195 setOperationAction(ISD::FREM , MVT::f32, Expand);
197 // If we're enabling GP optimizations, use hardware square root
198 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
199 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
201 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
202 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
204 // SPU can do rotate right and left, so legalize it... but customize for i8
205 // because instructions don't exist.
206 setOperationAction(ISD::ROTR, MVT::i32, Legal);
207 setOperationAction(ISD::ROTR, MVT::i16, Legal);
208 setOperationAction(ISD::ROTR, MVT::i8, Custom);
209 setOperationAction(ISD::ROTL, MVT::i32, Legal);
210 setOperationAction(ISD::ROTL, MVT::i16, Legal);
211 setOperationAction(ISD::ROTL, MVT::i8, Custom);
212 // SPU has no native version of shift left/right for i8
213 setOperationAction(ISD::SHL, MVT::i8, Custom);
214 setOperationAction(ISD::SRL, MVT::i8, Custom);
215 setOperationAction(ISD::SRA, MVT::i8, Custom);
216 // And SPU needs custom lowering for shift left/right for i64
217 setOperationAction(ISD::SHL, MVT::i64, Custom);
218 setOperationAction(ISD::SRL, MVT::i64, Custom);
219 setOperationAction(ISD::SRA, MVT::i64, Custom);
221 // Custom lower i32 multiplications
222 setOperationAction(ISD::MUL, MVT::i32, Custom);
224 // Need to custom handle (some) common i8, i64 math ops
225 setOperationAction(ISD::ADD, MVT::i64, Custom);
226 setOperationAction(ISD::SUB, MVT::i8, Custom);
227 setOperationAction(ISD::SUB, MVT::i64, Custom);
228 setOperationAction(ISD::MUL, MVT::i8, Custom);
230 // SPU does not have BSWAP. It does have i32 support CTLZ.
231 // CTPOP has to be custom lowered.
232 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
233 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
235 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
236 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
237 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
238 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
240 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
241 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
243 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
245 // SPU has a version of select that implements (a&~c)|(b&c), just like
246 // select ought to work:
247 setOperationAction(ISD::SELECT, MVT::i1, Promote);
248 setOperationAction(ISD::SELECT, MVT::i8, Legal);
249 setOperationAction(ISD::SELECT, MVT::i16, Legal);
250 setOperationAction(ISD::SELECT, MVT::i32, Legal);
251 setOperationAction(ISD::SELECT, MVT::i64, Expand);
253 setOperationAction(ISD::SETCC, MVT::i1, Promote);
254 setOperationAction(ISD::SETCC, MVT::i8, Legal);
255 setOperationAction(ISD::SETCC, MVT::i16, Legal);
256 setOperationAction(ISD::SETCC, MVT::i32, Legal);
257 setOperationAction(ISD::SETCC, MVT::i64, Expand);
259 // Zero extension and sign extension for i64 have to be
261 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
262 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
263 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
265 // SPU has a legal FP -> signed INT instruction
266 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
267 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
268 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
269 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
271 // FDIV on SPU requires custom lowering
272 setOperationAction(ISD::FDIV, MVT::f32, Custom);
273 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
275 // SPU has [U|S]INT_TO_FP
276 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
277 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
278 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
279 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
280 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
281 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
282 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
283 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
285 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
286 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
287 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
288 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
290 // We cannot sextinreg(i1). Expand to shifts.
291 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
293 // Support label based line numbers.
294 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
295 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
297 // We want to legalize GlobalAddress and ConstantPool nodes into the
298 // appropriate instructions to materialize the address.
299 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
301 MVT VT = (MVT::SimpleValueType)sctype;
303 setOperationAction(ISD::GlobalAddress, VT, Custom);
304 setOperationAction(ISD::ConstantPool, VT, Custom);
305 setOperationAction(ISD::JumpTable, VT, Custom);
308 // RET must be custom lowered, to meet ABI requirements
309 setOperationAction(ISD::RET, MVT::Other, Custom);
311 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
312 setOperationAction(ISD::VASTART , MVT::Other, Custom);
314 // Use the default implementation.
315 setOperationAction(ISD::VAARG , MVT::Other, Expand);
316 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
317 setOperationAction(ISD::VAEND , MVT::Other, Expand);
318 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
319 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
320 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
321 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
323 // Cell SPU has instructions for converting between i64 and fp.
324 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
325 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
327 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
328 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
330 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
331 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
333 // First set operation action for all vector types to expand. Then we
334 // will selectively turn on ones that can be effectively codegen'd.
335 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
336 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
337 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
338 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
339 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
340 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
342 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
343 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
344 MVT VT = (MVT::SimpleValueType)i;
346 // add/sub are legal for all supported vector VT's.
347 setOperationAction(ISD::ADD , VT, Legal);
348 setOperationAction(ISD::SUB , VT, Legal);
349 // mul has to be custom lowered.
350 setOperationAction(ISD::MUL , VT, Custom);
352 setOperationAction(ISD::AND , VT, Legal);
353 setOperationAction(ISD::OR , VT, Legal);
354 setOperationAction(ISD::XOR , VT, Legal);
355 setOperationAction(ISD::LOAD , VT, Legal);
356 setOperationAction(ISD::SELECT, VT, Legal);
357 setOperationAction(ISD::STORE, VT, Legal);
359 // These operations need to be expanded:
360 setOperationAction(ISD::SDIV, VT, Expand);
361 setOperationAction(ISD::SREM, VT, Expand);
362 setOperationAction(ISD::UDIV, VT, Expand);
363 setOperationAction(ISD::UREM, VT, Expand);
364 setOperationAction(ISD::FDIV, VT, Custom);
366 // Custom lower build_vector, constant pool spills, insert and
367 // extract vector elements:
368 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
369 setOperationAction(ISD::ConstantPool, VT, Custom);
370 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
371 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
372 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
373 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
376 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
377 setOperationAction(ISD::AND, MVT::v16i8, Custom);
378 setOperationAction(ISD::OR, MVT::v16i8, Custom);
379 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
380 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
382 setShiftAmountType(MVT::i32);
383 setSetCCResultContents(ZeroOrOneSetCCResult);
385 setStackPointerRegisterToSaveRestore(SPU::R1);
387 // We have target-specific dag combine patterns for the following nodes:
388 setTargetDAGCombine(ISD::ADD);
389 setTargetDAGCombine(ISD::ZERO_EXTEND);
390 setTargetDAGCombine(ISD::SIGN_EXTEND);
391 setTargetDAGCombine(ISD::ANY_EXTEND);
393 computeRegisterProperties();
397 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
399 if (node_names.empty()) {
400 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
401 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
402 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
403 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
404 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
405 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
406 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
407 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
408 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
409 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
410 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
411 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
412 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
413 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
414 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
415 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
416 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
417 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
418 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
419 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
420 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
421 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
422 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
423 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
424 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
425 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
426 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
427 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
428 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
429 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
430 "SPUISD::ROTQUAD_RZ_BYTES";
431 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
432 "SPUISD::ROTQUAD_RZ_BITS";
433 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
434 "SPUISD::ROTBYTES_RIGHT_S";
435 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
436 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
437 "SPUISD::ROTBYTES_LEFT_CHAINED";
438 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
439 "SPUISD::ROTBYTES_LEFT_BITS";
440 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
441 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
442 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
443 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
444 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
445 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
446 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
447 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
448 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
451 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
453 return ((i != node_names.end()) ? i->second : 0);
456 MVT SPUTargetLowering::getSetCCResultType(const SDOperand &Op) const {
457 MVT VT = Op.getValueType();
464 //===----------------------------------------------------------------------===//
465 // Calling convention code:
466 //===----------------------------------------------------------------------===//
468 #include "SPUGenCallingConv.inc"
470 //===----------------------------------------------------------------------===//
471 // LowerOperation implementation
472 //===----------------------------------------------------------------------===//
474 /// Aligned load common code for CellSPU
476 \param[in] Op The SelectionDAG load or store operand
477 \param[in] DAG The selection DAG
478 \param[in] ST CellSPU subtarget information structure
479 \param[in,out] alignment Caller initializes this to the load or store node's
480 value from getAlignment(), may be updated while generating the aligned load
481 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
482 offset (divisible by 16, modulo 16 == 0)
483 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
484 offset of the preferred slot (modulo 16 != 0)
485 \param[in,out] VT Caller initializes this value type to the the load or store
486 node's loaded or stored value type; may be updated if an i1-extended load or
488 \param[out] was16aligned true if the base pointer had 16-byte alignment,
489 otherwise false. Can help to determine if the chunk needs to be rotated.
491 Both load and store lowering load a block of data aligned on a 16-byte
492 boundary. This is the common aligned load code shared between both.
495 AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
497 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
498 MVT &VT, bool &was16aligned)
500 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
501 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
502 SDOperand basePtr = LSN->getBasePtr();
503 SDOperand chain = LSN->getChain();
505 if (basePtr.getOpcode() == ISD::ADD) {
506 SDOperand Op1 = basePtr.Val->getOperand(1);
508 if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
509 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
511 alignOffs = (int) CN->getValue();
512 prefSlotOffs = (int) (alignOffs & 0xf);
514 // Adjust the rotation amount to ensure that the final result ends up in
515 // the preferred slot:
516 prefSlotOffs -= vtm->prefslot_byte;
517 basePtr = basePtr.getOperand(0);
519 // Loading from memory, can we adjust alignment?
520 if (basePtr.getOpcode() == SPUISD::AFormAddr) {
521 SDOperand APtr = basePtr.getOperand(0);
522 if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
523 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
524 alignment = GSDN->getGlobal()->getAlignment();
529 prefSlotOffs = -vtm->prefslot_byte;
531 } else if (basePtr.getOpcode() == ISD::FrameIndex) {
532 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
533 alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
534 prefSlotOffs = (int) (alignOffs & 0xf);
535 prefSlotOffs -= vtm->prefslot_byte;
536 basePtr = DAG.getRegister(SPU::R1, VT);
539 prefSlotOffs = -vtm->prefslot_byte;
542 if (alignment == 16) {
543 // Realign the base pointer as a D-Form address:
544 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
545 basePtr = DAG.getNode(ISD::ADD, PtrVT,
547 DAG.getConstant((alignOffs & ~0xf), PtrVT));
550 // Emit the vector load:
552 return DAG.getLoad(MVT::v16i8, chain, basePtr,
553 LSN->getSrcValue(), LSN->getSrcValueOffset(),
554 LSN->isVolatile(), 16);
557 // Unaligned load or we're using the "large memory" model, which means that
558 // we have to be very pessimistic:
559 if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
560 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
564 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
565 DAG.getConstant((alignOffs & ~0xf), PtrVT));
566 was16aligned = false;
567 return DAG.getLoad(MVT::v16i8, chain, basePtr,
568 LSN->getSrcValue(), LSN->getSrcValueOffset(),
569 LSN->isVolatile(), 16);
572 /// Custom lower loads for CellSPU
574 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
575 within a 16-byte block, we have to rotate to extract the requested element.
578 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
579 LoadSDNode *LN = cast<LoadSDNode>(Op);
580 SDOperand the_chain = LN->getChain();
581 MVT VT = LN->getMemoryVT();
582 MVT OpVT = Op.Val->getValueType(0);
583 ISD::LoadExtType ExtType = LN->getExtensionType();
584 unsigned alignment = LN->getAlignment();
587 switch (LN->getAddressingMode()) {
588 case ISD::UNINDEXED: {
592 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
597 the_chain = result.getValue(1);
598 // Rotate the chunk if necessary
601 if (rotamt != 0 || !was16aligned) {
602 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
607 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
609 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
610 LoadSDNode *LN1 = cast<LoadSDNode>(result);
611 Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
612 DAG.getConstant(rotamt, PtrVT));
615 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
616 the_chain = result.getValue(1);
619 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
621 MVT vecVT = MVT::v16i8;
623 // Convert the loaded v16i8 vector to the appropriate vector type
624 // specified by the operand:
627 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
629 vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
632 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
633 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
634 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
635 the_chain = result.getValue(1);
637 // Handle the sign and zero-extending loads for i1 and i8:
640 if (ExtType == ISD::SEXTLOAD) {
641 NewOpC = (OpVT == MVT::i1
642 ? SPUISD::EXTRACT_I1_SEXT
643 : SPUISD::EXTRACT_I8_SEXT);
645 assert(ExtType == ISD::ZEXTLOAD);
646 NewOpC = (OpVT == MVT::i1
647 ? SPUISD::EXTRACT_I1_ZEXT
648 : SPUISD::EXTRACT_I8_ZEXT);
651 result = DAG.getNode(NewOpC, OpVT, result);
654 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
655 SDOperand retops[2] = {
660 result = DAG.getNode(SPUISD::LDRESULT, retvts,
661 retops, sizeof(retops) / sizeof(retops[0]));
668 case ISD::LAST_INDEXED_MODE:
669 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
671 cerr << (unsigned) LN->getAddressingMode() << "\n";
679 /// Custom lower stores for CellSPU
681 All CellSPU stores are aligned to 16-byte boundaries, so for elements
682 within a 16-byte block, we have to generate a shuffle to insert the
683 requested element into its place, then store the resulting block.
686 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
687 StoreSDNode *SN = cast<StoreSDNode>(Op);
688 SDOperand Value = SN->getValue();
689 MVT VT = Value.getValueType();
690 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
691 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
692 unsigned alignment = SN->getAlignment();
694 switch (SN->getAddressingMode()) {
695 case ISD::UNINDEXED: {
696 int chunk_offset, slot_offset;
699 // The vector type we really want to load from the 16-byte chunk, except
700 // in the case of MVT::i1, which has to be v16i8.
701 MVT vecVT, stVecVT = MVT::v16i8;
704 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
705 vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
707 SDOperand alignLoadVec =
708 AlignedLoad(Op, DAG, ST, SN, alignment,
709 chunk_offset, slot_offset, VT, was16aligned);
711 if (alignLoadVec.Val == 0)
714 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
715 SDOperand basePtr = LN->getBasePtr();
716 SDOperand the_chain = alignLoadVec.getValue(1);
717 SDOperand theValue = SN->getValue();
721 && (theValue.getOpcode() == ISD::AssertZext
722 || theValue.getOpcode() == ISD::AssertSext)) {
723 // Drill down and get the value for zero- and sign-extended
725 theValue = theValue.getOperand(0);
730 SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
731 SDOperand insertEltPtr;
732 SDOperand insertEltOp;
734 // If the base pointer is already a D-form address, then just create
735 // a new D-form address with a slot offset and the orignal base pointer.
736 // Otherwise generate a D-form address with the slot offset relative
737 // to the stack pointer, which is always aligned.
738 DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
739 DEBUG(basePtr.Val->dump(&DAG));
742 if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
743 (basePtr.getOpcode() == ISD::ADD
744 && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
745 insertEltPtr = basePtr;
747 insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
750 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
751 result = DAG.getNode(SPUISD::SHUFB, vecVT,
752 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
754 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
756 result = DAG.getStore(the_chain, result, basePtr,
757 LN->getSrcValue(), LN->getSrcValueOffset(),
758 LN->isVolatile(), LN->getAlignment());
767 case ISD::LAST_INDEXED_MODE:
768 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
770 cerr << (unsigned) SN->getAddressingMode() << "\n";
778 /// Generate the address of a constant pool entry.
780 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
781 MVT PtrVT = Op.getValueType();
782 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
783 Constant *C = CP->getConstVal();
784 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
785 SDOperand Zero = DAG.getConstant(0, PtrVT);
786 const TargetMachine &TM = DAG.getTarget();
788 if (TM.getRelocationModel() == Reloc::Static) {
789 if (!ST->usingLargeMem()) {
790 // Just return the SDOperand with the constant pool address in it.
791 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
793 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
794 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
795 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
800 "LowerConstantPool: Relocation model other than static not supported.");
805 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
806 MVT PtrVT = Op.getValueType();
807 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
808 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
809 SDOperand Zero = DAG.getConstant(0, PtrVT);
810 const TargetMachine &TM = DAG.getTarget();
812 if (TM.getRelocationModel() == Reloc::Static) {
813 if (!ST->usingLargeMem()) {
814 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
816 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
817 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
818 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
823 "LowerJumpTable: Relocation model other than static not supported.");
828 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
829 MVT PtrVT = Op.getValueType();
830 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
831 GlobalValue *GV = GSDN->getGlobal();
832 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
833 const TargetMachine &TM = DAG.getTarget();
834 SDOperand Zero = DAG.getConstant(0, PtrVT);
836 if (TM.getRelocationModel() == Reloc::Static) {
837 if (!ST->usingLargeMem()) {
838 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
840 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
841 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
842 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
845 cerr << "LowerGlobalAddress: Relocation model other than static not "
854 //! Custom lower i64 integer constants
856 This code inserts all of the necessary juggling that needs to occur to load
857 a 64-bit constant into a register.
860 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
861 MVT VT = Op.getValueType();
862 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
864 if (VT == MVT::i64) {
865 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
866 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
867 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
869 cerr << "LowerConstant: unhandled constant type "
879 //! Custom lower double precision floating point constants
881 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
882 MVT VT = Op.getValueType();
883 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
886 "LowerConstantFP: Node is not ConstantFPSDNode");
888 if (VT == MVT::f64) {
889 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
890 return DAG.getNode(ISD::BIT_CONVERT, VT,
891 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
897 //! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
899 LowerBRCOND(SDOperand Op, SelectionDAG &DAG)
901 SDOperand Cond = Op.getOperand(1);
902 MVT CondVT = Cond.getValueType();
905 if (CondVT == MVT::i1 || CondVT == MVT::i8) {
906 CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
907 return DAG.getNode(ISD::BRCOND, Op.getValueType(),
909 DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
912 return SDOperand(); // Unchanged
916 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
918 MachineFunction &MF = DAG.getMachineFunction();
919 MachineFrameInfo *MFI = MF.getFrameInfo();
920 MachineRegisterInfo &RegInfo = MF.getRegInfo();
921 SmallVector<SDOperand, 8> ArgValues;
922 SDOperand Root = Op.getOperand(0);
923 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
925 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
926 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
928 unsigned ArgOffset = SPUFrameInfo::minStackSize();
929 unsigned ArgRegIdx = 0;
930 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
932 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
934 // Add DAG nodes to load the arguments or copy them out of registers.
935 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
937 bool needsLoad = false;
938 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
939 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
941 switch (ObjectVT.getSimpleVT()) {
943 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
944 << ObjectVT.getMVTString()
949 if (!isVarArg && ArgRegIdx < NumArgRegs) {
950 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
951 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
952 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
959 if (!isVarArg && ArgRegIdx < NumArgRegs) {
960 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
961 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
962 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
969 if (!isVarArg && ArgRegIdx < NumArgRegs) {
970 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
971 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
972 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
979 if (!isVarArg && ArgRegIdx < NumArgRegs) {
980 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
981 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
982 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
989 if (!isVarArg && ArgRegIdx < NumArgRegs) {
990 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
991 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
992 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
999 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1000 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
1001 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1002 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
1014 if (!isVarArg && ArgRegIdx < NumArgRegs) {
1015 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1016 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1017 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1025 // We need to load the argument to a virtual register if we determined above
1026 // that we ran out of physical registers of the appropriate type
1028 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1029 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1030 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1031 ArgOffset += StackSlotSize;
1034 ArgValues.push_back(ArgVal);
1037 // If the function takes variable number of arguments, make a frame index for
1038 // the start of the first vararg value... for expansion of llvm.va_start.
1040 VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1042 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1043 // If this function is vararg, store any remaining integer argument regs to
1044 // their spots on the stack so that they may be loaded by deferencing the
1045 // result of va_next.
1046 SmallVector<SDOperand, 8> MemOps;
1047 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1048 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1049 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1050 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1051 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1052 MemOps.push_back(Store);
1053 // Increment the address by four for the next argument to store
1054 SDOperand PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1055 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1057 if (!MemOps.empty())
1058 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1061 ArgValues.push_back(Root);
1063 // Return the new list of results.
1064 std::vector<MVT> RetVT(Op.Val->value_begin(),
1065 Op.Val->value_end());
1066 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1069 /// isLSAAddress - Return the immediate to use if the specified
1070 /// value is representable as a LSA address.
1071 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1072 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1075 int Addr = C->getValue();
1076 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1077 (Addr << 14 >> 14) != Addr)
1078 return 0; // Top 14 bits have to be sext of immediate.
1080 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1085 LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1086 SDOperand Chain = Op.getOperand(0);
1088 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1089 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1091 SDOperand Callee = Op.getOperand(4);
1092 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1093 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1094 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1095 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1097 // Handy pointer type
1098 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1100 // Accumulate how many bytes are to be pushed on the stack, including the
1101 // linkage area, and parameter passing area. According to the SPU ABI,
1102 // we minimally need space for [LR] and [SP]
1103 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1105 // Set up a copy of the stack pointer for use loading and storing any
1106 // arguments that may not fit in the registers available for argument
1108 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1110 // Figure out which arguments are going to go in registers, and which in
1112 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1113 unsigned ArgRegIdx = 0;
1115 // Keep track of registers passing arguments
1116 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1117 // And the arguments passed on the stack
1118 SmallVector<SDOperand, 8> MemOpChains;
1120 for (unsigned i = 0; i != NumOps; ++i) {
1121 SDOperand Arg = Op.getOperand(5+2*i);
1123 // PtrOff will be used to store the current argument to the stack if a
1124 // register cannot be found for it.
1125 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1126 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1128 switch (Arg.getValueType().getSimpleVT()) {
1129 default: assert(0 && "Unexpected ValueType for argument!");
1133 if (ArgRegIdx != NumArgRegs) {
1134 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1136 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1137 ArgOffset += StackSlotSize;
1142 if (ArgRegIdx != NumArgRegs) {
1143 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1145 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1146 ArgOffset += StackSlotSize;
1153 if (ArgRegIdx != NumArgRegs) {
1154 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1156 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1157 ArgOffset += StackSlotSize;
1163 // Update number of stack bytes actually used, insert a call sequence start
1164 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1165 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1167 if (!MemOpChains.empty()) {
1168 // Adjust the stack pointer for the stack arguments.
1169 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1170 &MemOpChains[0], MemOpChains.size());
1173 // Build a sequence of copy-to-reg nodes chained together with token chain
1174 // and flag operands which copy the outgoing args into the appropriate regs.
1176 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1177 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1179 InFlag = Chain.getValue(1);
1182 std::vector<MVT> NodeTys;
1183 NodeTys.push_back(MVT::Other); // Returns a chain
1184 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1186 SmallVector<SDOperand, 8> Ops;
1187 unsigned CallOpc = SPUISD::CALL;
1189 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1190 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1191 // node so that legalize doesn't hack it.
1192 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1193 GlobalValue *GV = G->getGlobal();
1194 MVT CalleeVT = Callee.getValueType();
1195 SDOperand Zero = DAG.getConstant(0, PtrVT);
1196 SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1198 if (!ST->usingLargeMem()) {
1199 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1200 // style calls, otherwise, external symbols are BRASL calls. This assumes
1201 // that declared/defined symbols are in the same compilation unit and can
1202 // be reached through PC-relative jumps.
1205 // This may be an unsafe assumption for JIT and really large compilation
1207 if (GV->isDeclaration()) {
1208 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1210 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1213 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1215 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1217 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1218 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1219 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1220 // If this is an absolute destination address that appears to be a legal
1221 // local store address, use the munged value.
1222 Callee = SDOperand(Dest, 0);
1225 Ops.push_back(Chain);
1226 Ops.push_back(Callee);
1228 // Add argument registers to the end of the list so that they are known live
1230 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1231 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1232 RegsToPass[i].second.getValueType()));
1235 Ops.push_back(InFlag);
1236 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1237 InFlag = Chain.getValue(1);
1239 Chain = DAG.getCALLSEQ_END(Chain,
1240 DAG.getConstant(NumStackBytes, PtrVT),
1241 DAG.getConstant(0, PtrVT),
1243 if (Op.Val->getValueType(0) != MVT::Other)
1244 InFlag = Chain.getValue(1);
1246 SDOperand ResultVals[3];
1247 unsigned NumResults = 0;
1250 // If the call has results, copy the values out of the ret val registers.
1251 switch (Op.Val->getValueType(0).getSimpleVT()) {
1252 default: assert(0 && "Unexpected ret value!");
1253 case MVT::Other: break;
1255 if (Op.Val->getValueType(1) == MVT::i32) {
1256 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1257 ResultVals[0] = Chain.getValue(0);
1258 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1259 Chain.getValue(2)).getValue(1);
1260 ResultVals[1] = Chain.getValue(0);
1262 NodeTys.push_back(MVT::i32);
1264 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1265 ResultVals[0] = Chain.getValue(0);
1268 NodeTys.push_back(MVT::i32);
1271 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1272 ResultVals[0] = Chain.getValue(0);
1274 NodeTys.push_back(MVT::i64);
1278 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1279 InFlag).getValue(1);
1280 ResultVals[0] = Chain.getValue(0);
1282 NodeTys.push_back(Op.Val->getValueType(0));
1289 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1290 InFlag).getValue(1);
1291 ResultVals[0] = Chain.getValue(0);
1293 NodeTys.push_back(Op.Val->getValueType(0));
1297 NodeTys.push_back(MVT::Other);
1299 // If the function returns void, just return the chain.
1300 if (NumResults == 0)
1303 // Otherwise, merge everything together with a MERGE_VALUES node.
1304 ResultVals[NumResults++] = Chain;
1305 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1306 ResultVals, NumResults);
1307 return Res.getValue(Op.ResNo);
1311 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1312 SmallVector<CCValAssign, 16> RVLocs;
1313 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1314 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1315 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1316 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1318 // If this is the first return lowered for this function, add the regs to the
1319 // liveout set for the function.
1320 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1321 for (unsigned i = 0; i != RVLocs.size(); ++i)
1322 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1325 SDOperand Chain = Op.getOperand(0);
1328 // Copy the result values into the output registers.
1329 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1330 CCValAssign &VA = RVLocs[i];
1331 assert(VA.isRegLoc() && "Can only return in registers!");
1332 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1333 Flag = Chain.getValue(1);
1337 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1339 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1343 //===----------------------------------------------------------------------===//
1344 // Vector related lowering:
1345 //===----------------------------------------------------------------------===//
1347 static ConstantSDNode *
1348 getVecImm(SDNode *N) {
1349 SDOperand OpVal(0, 0);
1351 // Check to see if this buildvec has a single non-undef value in its elements.
1352 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1353 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1355 OpVal = N->getOperand(i);
1356 else if (OpVal != N->getOperand(i))
1360 if (OpVal.Val != 0) {
1361 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1366 return 0; // All UNDEF: use implicit def.; not Constant node
1369 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1370 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1372 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1374 if (ConstantSDNode *CN = getVecImm(N)) {
1375 uint64_t Value = CN->getValue();
1376 if (ValueType == MVT::i64) {
1377 uint64_t UValue = CN->getValue();
1378 uint32_t upper = uint32_t(UValue >> 32);
1379 uint32_t lower = uint32_t(UValue);
1382 Value = Value >> 32;
1384 if (Value <= 0x3ffff)
1385 return DAG.getConstant(Value, ValueType);
1391 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1392 /// and the value fits into a signed 16-bit constant, and if so, return the
1394 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1396 if (ConstantSDNode *CN = getVecImm(N)) {
1397 int64_t Value = CN->getSignExtended();
1398 if (ValueType == MVT::i64) {
1399 uint64_t UValue = CN->getValue();
1400 uint32_t upper = uint32_t(UValue >> 32);
1401 uint32_t lower = uint32_t(UValue);
1404 Value = Value >> 32;
1406 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1407 return DAG.getConstant(Value, ValueType);
1414 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1415 /// and the value fits into a signed 10-bit constant, and if so, return the
1417 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1419 if (ConstantSDNode *CN = getVecImm(N)) {
1420 int64_t Value = CN->getSignExtended();
1421 if (ValueType == MVT::i64) {
1422 uint64_t UValue = CN->getValue();
1423 uint32_t upper = uint32_t(UValue >> 32);
1424 uint32_t lower = uint32_t(UValue);
1427 Value = Value >> 32;
1429 if (isS10Constant(Value))
1430 return DAG.getConstant(Value, ValueType);
1436 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1437 /// and the value fits into a signed 8-bit constant, and if so, return the
1440 /// @note: The incoming vector is v16i8 because that's the only way we can load
1441 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1443 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1445 if (ConstantSDNode *CN = getVecImm(N)) {
1446 int Value = (int) CN->getValue();
1447 if (ValueType == MVT::i16
1448 && Value <= 0xffff /* truncated from uint64_t */
1449 && ((short) Value >> 8) == ((short) Value & 0xff))
1450 return DAG.getConstant(Value & 0xff, ValueType);
1451 else if (ValueType == MVT::i8
1452 && (Value & 0xff) == Value)
1453 return DAG.getConstant(Value, ValueType);
1459 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1460 /// and the value fits into a signed 16-bit constant, and if so, return the
1462 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1464 if (ConstantSDNode *CN = getVecImm(N)) {
1465 uint64_t Value = CN->getValue();
1466 if ((ValueType == MVT::i32
1467 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1468 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1469 return DAG.getConstant(Value >> 16, ValueType);
1475 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1476 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1477 if (ConstantSDNode *CN = getVecImm(N)) {
1478 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1484 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1485 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1486 if (ConstantSDNode *CN = getVecImm(N)) {
1487 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1493 // If this is a vector of constants or undefs, get the bits. A bit in
1494 // UndefBits is set if the corresponding element of the vector is an
1495 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1496 // zero. Return true if this is not an array of constants, false if it is.
1498 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1499 uint64_t UndefBits[2]) {
1500 // Start with zero'd results.
1501 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1503 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1504 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1505 SDOperand OpVal = BV->getOperand(i);
1507 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1508 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1510 uint64_t EltBits = 0;
1511 if (OpVal.getOpcode() == ISD::UNDEF) {
1512 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1513 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1515 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1516 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1517 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1518 const APFloat &apf = CN->getValueAPF();
1519 EltBits = (CN->getValueType(0) == MVT::f32
1520 ? FloatToBits(apf.convertToFloat())
1521 : DoubleToBits(apf.convertToDouble()));
1523 // Nonconstant element.
1527 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1530 //printf("%llx %llx %llx %llx\n",
1531 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1535 /// If this is a splat (repetition) of a value across the whole vector, return
1536 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1537 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1538 /// SplatSize = 1 byte.
1539 static bool isConstantSplat(const uint64_t Bits128[2],
1540 const uint64_t Undef128[2],
1542 uint64_t &SplatBits, uint64_t &SplatUndef,
1544 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1545 // the same as the lower 64-bits, ignoring undefs.
1546 uint64_t Bits64 = Bits128[0] | Bits128[1];
1547 uint64_t Undef64 = Undef128[0] & Undef128[1];
1548 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1549 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1550 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1551 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1553 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1554 if (MinSplatBits < 64) {
1556 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1558 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1559 if (MinSplatBits < 32) {
1561 // If the top 16-bits are different than the lower 16-bits, ignoring
1562 // undefs, we have an i32 splat.
1563 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1564 if (MinSplatBits < 16) {
1565 // If the top 8-bits are different than the lower 8-bits, ignoring
1566 // undefs, we have an i16 splat.
1567 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1568 // Otherwise, we have an 8-bit splat.
1569 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1570 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1576 SplatUndef = Undef16;
1583 SplatUndef = Undef32;
1589 SplatBits = Bits128[0];
1590 SplatUndef = Undef128[0];
1596 return false; // Can't be a splat if two pieces don't match.
1599 // If this is a case we can't handle, return null and let the default
1600 // expansion code take care of it. If we CAN select this case, and if it
1601 // selects to a single instruction, return Op. Otherwise, if we can codegen
1602 // this case more efficiently than a constant pool load, lower it to the
1603 // sequence of ops that should be used.
1604 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1605 MVT VT = Op.getValueType();
1606 // If this is a vector of constants or undefs, get the bits. A bit in
1607 // UndefBits is set if the corresponding element of the vector is an
1608 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1610 uint64_t VectorBits[2];
1611 uint64_t UndefBits[2];
1612 uint64_t SplatBits, SplatUndef;
1614 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1615 || !isConstantSplat(VectorBits, UndefBits,
1616 VT.getVectorElementType().getSizeInBits(),
1617 SplatBits, SplatUndef, SplatSize))
1618 return SDOperand(); // Not a constant vector, not a splat.
1620 switch (VT.getSimpleVT()) {
1623 uint32_t Value32 = SplatBits;
1624 assert(SplatSize == 4
1625 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1626 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1627 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1628 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1629 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1633 uint64_t f64val = SplatBits;
1634 assert(SplatSize == 8
1635 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1636 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1637 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1638 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1639 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1643 // 8-bit constants have to be expanded to 16-bits
1644 unsigned short Value16 = SplatBits | (SplatBits << 8);
1646 for (int i = 0; i < 8; ++i)
1647 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1648 return DAG.getNode(ISD::BIT_CONVERT, VT,
1649 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1652 unsigned short Value16;
1654 Value16 = (unsigned short) (SplatBits & 0xffff);
1656 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1657 SDOperand T = DAG.getConstant(Value16, VT.getVectorElementType());
1659 for (int i = 0; i < 8; ++i) Ops[i] = T;
1660 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1663 unsigned int Value = SplatBits;
1664 SDOperand T = DAG.getConstant(Value, VT.getVectorElementType());
1665 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1668 uint64_t val = SplatBits;
1669 uint32_t upper = uint32_t(val >> 32);
1670 uint32_t lower = uint32_t(val);
1672 if (upper == lower) {
1673 // Magic constant that can be matched by IL, ILA, et. al.
1674 SDOperand Val = DAG.getTargetConstant(val, MVT::i64);
1675 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1679 SmallVector<SDOperand, 16> ShufBytes;
1681 bool upper_special, lower_special;
1683 // NOTE: This code creates common-case shuffle masks that can be easily
1684 // detected as common expressions. It is not attempting to create highly
1685 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1687 // Detect if the upper or lower half is a special shuffle mask pattern:
1688 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1689 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1691 // Create lower vector if not a special pattern
1692 if (!lower_special) {
1693 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1694 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1695 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1696 LO32C, LO32C, LO32C, LO32C));
1699 // Create upper vector if not a special pattern
1700 if (!upper_special) {
1701 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1702 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1703 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1704 HI32C, HI32C, HI32C, HI32C));
1707 // If either upper or lower are special, then the two input operands are
1708 // the same (basically, one of them is a "don't care")
1713 if (lower_special && upper_special) {
1714 // Unhappy situation... both upper and lower are special, so punt with
1715 // a target constant:
1716 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1717 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1721 for (int i = 0; i < 4; ++i) {
1723 for (int j = 0; j < 4; ++j) {
1725 bool process_upper, process_lower;
1727 process_upper = (upper_special && (i & 1) == 0);
1728 process_lower = (lower_special && (i & 1) == 1);
1730 if (process_upper || process_lower) {
1731 if ((process_upper && upper == 0)
1732 || (process_lower && lower == 0))
1734 else if ((process_upper && upper == 0xffffffff)
1735 || (process_lower && lower == 0xffffffff))
1737 else if ((process_upper && upper == 0x80000000)
1738 || (process_lower && lower == 0x80000000))
1739 val |= (j == 0 ? 0xe0 : 0x80);
1741 val |= i * 4 + j + ((i & 1) * 16);
1744 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1747 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1748 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1749 &ShufBytes[0], ShufBytes.size()));
1757 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1758 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1759 /// permutation vector, V3, is monotonically increasing with one "exception"
1760 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1761 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1762 /// In either case, the net result is going to eventually invoke SHUFB to
1763 /// permute/shuffle the bytes from V1 and V2.
1765 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1766 /// control word for byte/halfword/word insertion. This takes care of a single
1767 /// element move from V2 into V1.
1769 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1770 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1771 SDOperand V1 = Op.getOperand(0);
1772 SDOperand V2 = Op.getOperand(1);
1773 SDOperand PermMask = Op.getOperand(2);
1775 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1777 // If we have a single element being moved from V1 to V2, this can be handled
1778 // using the C*[DX] compute mask instructions, but the vector elements have
1779 // to be monotonically increasing with one exception element.
1780 MVT EltVT = V1.getValueType().getVectorElementType();
1781 unsigned EltsFromV2 = 0;
1783 unsigned V2EltIdx0 = 0;
1784 unsigned CurrElt = 0;
1785 bool monotonic = true;
1786 if (EltVT == MVT::i8)
1788 else if (EltVT == MVT::i16)
1790 else if (EltVT == MVT::i32)
1793 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1795 for (unsigned i = 0, e = PermMask.getNumOperands();
1796 EltsFromV2 <= 1 && monotonic && i != e;
1799 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1802 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1804 if (SrcElt >= V2EltIdx0) {
1806 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1807 } else if (CurrElt != SrcElt) {
1814 if (EltsFromV2 == 1 && monotonic) {
1815 // Compute mask and shuffle
1816 MachineFunction &MF = DAG.getMachineFunction();
1817 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1818 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1819 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1820 // Initialize temporary register to 0
1821 SDOperand InitTempReg =
1822 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1823 // Copy register's contents as index in INSERT_MASK:
1824 SDOperand ShufMaskOp =
1825 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1826 DAG.getTargetConstant(V2Elt, MVT::i32),
1827 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1828 // Use shuffle mask in SHUFB synthetic instruction:
1829 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1831 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1832 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1834 SmallVector<SDOperand, 16> ResultMask;
1835 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1837 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1840 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1842 for (unsigned j = 0; j < BytesPerElement; ++j) {
1843 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1848 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1849 &ResultMask[0], ResultMask.size());
1850 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1854 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1855 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1857 if (Op0.Val->getOpcode() == ISD::Constant) {
1858 // For a constant, build the appropriate constant vector, which will
1859 // eventually simplify to a vector register load.
1861 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1862 SmallVector<SDOperand, 16> ConstVecValues;
1866 // Create a constant vector:
1867 switch (Op.getValueType().getSimpleVT()) {
1868 default: assert(0 && "Unexpected constant value type in "
1869 "LowerSCALAR_TO_VECTOR");
1870 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1871 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1872 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1873 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1874 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1875 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1878 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1879 for (size_t j = 0; j < n_copies; ++j)
1880 ConstVecValues.push_back(CValue);
1882 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1883 &ConstVecValues[0], ConstVecValues.size());
1885 // Otherwise, copy the value from one register to another:
1886 switch (Op0.getValueType().getSimpleVT()) {
1887 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1894 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1901 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1902 switch (Op.getValueType().getSimpleVT()) {
1904 cerr << "CellSPU: Unknown vector multiplication, got "
1905 << Op.getValueType().getMVTString()
1911 SDOperand rA = Op.getOperand(0);
1912 SDOperand rB = Op.getOperand(1);
1913 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1914 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1915 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1916 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1918 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1922 // Multiply two v8i16 vectors (pipeline friendly version):
1923 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1924 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1925 // c) Use SELB to select upper and lower halves from the intermediate results
1927 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1928 // dual-issue. This code does manage to do this, even if it's a little on
1931 MachineFunction &MF = DAG.getMachineFunction();
1932 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1933 SDOperand Chain = Op.getOperand(0);
1934 SDOperand rA = Op.getOperand(0);
1935 SDOperand rB = Op.getOperand(1);
1936 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1937 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1940 DAG.getCopyToReg(Chain, FSMBIreg,
1941 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1942 DAG.getConstant(0xcccc, MVT::i16)));
1945 DAG.getCopyToReg(FSMBOp, HiProdReg,
1946 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1948 SDOperand HHProd_v4i32 =
1949 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1950 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1952 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1953 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1954 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1955 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1957 DAG.getConstant(16, MVT::i16))),
1958 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1961 // This M00sE is N@stI! (apologies to Monty Python)
1963 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1964 // is to break it all apart, sign extend, and reassemble the various
1965 // intermediate products.
1967 SDOperand rA = Op.getOperand(0);
1968 SDOperand rB = Op.getOperand(1);
1969 SDOperand c8 = DAG.getConstant(8, MVT::i32);
1970 SDOperand c16 = DAG.getConstant(16, MVT::i32);
1973 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1974 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1975 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1977 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1979 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1982 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1983 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1985 SDOperand FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1986 DAG.getConstant(0x2222, MVT::i16));
1988 SDOperand LoProdParts =
1989 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1990 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1991 LLProd, LHProd, FSMBmask));
1993 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1996 DAG.getNode(ISD::AND, MVT::v4i32,
1998 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1999 LoProdMask, LoProdMask,
2000 LoProdMask, LoProdMask));
2003 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2004 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
2007 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2008 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
2011 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2012 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2013 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2015 SDOperand HHProd_1 =
2016 DAG.getNode(SPUISD::MPY, MVT::v8i16,
2017 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2018 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
2019 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2020 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
2023 DAG.getNode(SPUISD::SELB, MVT::v8i16,
2025 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2029 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2031 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2032 DAG.getNode(ISD::OR, MVT::v4i32,
2040 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2041 MachineFunction &MF = DAG.getMachineFunction();
2042 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2044 SDOperand A = Op.getOperand(0);
2045 SDOperand B = Op.getOperand(1);
2046 MVT VT = Op.getValueType();
2048 unsigned VRegBR, VRegC;
2050 if (VT == MVT::f32) {
2051 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2052 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2054 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2055 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2057 // TODO: make sure we're feeding FPInterp the right arguments
2058 // Right now: fi B, frest(B)
2061 // (Floating Interpolate (FP Reciprocal Estimate B))
2063 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2064 DAG.getNode(SPUISD::FPInterp, VT, B,
2065 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2067 // Computes A * BRcpl and stores in a temporary register
2069 DAG.getCopyToReg(BRcpl, VRegC,
2070 DAG.getNode(ISD::FMUL, VT, A,
2071 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2072 // What's the Chain variable do? It's magic!
2073 // TODO: set Chain = Op(0).getEntryNode()
2075 return DAG.getNode(ISD::FADD, VT,
2076 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2077 DAG.getNode(ISD::FMUL, VT,
2078 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2079 DAG.getNode(ISD::FSUB, VT, A,
2080 DAG.getNode(ISD::FMUL, VT, B,
2081 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2084 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2085 MVT VT = Op.getValueType();
2086 SDOperand N = Op.getOperand(0);
2087 SDOperand Elt = Op.getOperand(1);
2088 SDOperand ShufMask[16];
2089 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2091 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2093 int EltNo = (int) C->getValue();
2096 if (VT == MVT::i8 && EltNo >= 16)
2097 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2098 else if (VT == MVT::i16 && EltNo >= 8)
2099 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2100 else if (VT == MVT::i32 && EltNo >= 4)
2101 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2102 else if (VT == MVT::i64 && EltNo >= 2)
2103 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2105 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2106 // i32 and i64: Element 0 is the preferred slot
2107 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2110 // Need to generate shuffle mask and extract:
2111 int prefslot_begin = -1, prefslot_end = -1;
2112 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2114 switch (VT.getSimpleVT()) {
2116 assert(false && "Invalid value type!");
2118 prefslot_begin = prefslot_end = 3;
2122 prefslot_begin = 2; prefslot_end = 3;
2126 prefslot_begin = 0; prefslot_end = 3;
2130 prefslot_begin = 0; prefslot_end = 7;
2135 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2136 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2138 for (int i = 0; i < 16; ++i) {
2139 // zero fill uppper part of preferred slot, don't care about the
2141 unsigned int mask_val;
2143 if (i <= prefslot_end) {
2145 ((i < prefslot_begin)
2147 : elt_byte + (i - prefslot_begin));
2149 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2151 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2154 SDOperand ShufMaskVec =
2155 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2157 sizeof(ShufMask) / sizeof(ShufMask[0]));
2159 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2160 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2161 N, N, ShufMaskVec));
2165 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2166 SDOperand VecOp = Op.getOperand(0);
2167 SDOperand ValOp = Op.getOperand(1);
2168 SDOperand IdxOp = Op.getOperand(2);
2169 MVT VT = Op.getValueType();
2171 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2172 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2174 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2175 // Use $2 because it's always 16-byte aligned and it's available:
2176 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2179 DAG.getNode(SPUISD::SHUFB, VT,
2180 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2182 DAG.getNode(SPUISD::INSERT_MASK, VT,
2183 DAG.getNode(ISD::ADD, PtrVT,
2185 DAG.getConstant(CN->getValue(),
2191 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2193 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2195 assert(Op.getValueType() == MVT::i8);
2198 assert(0 && "Unhandled i8 math operator");
2202 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2204 SDOperand N1 = Op.getOperand(1);
2205 N0 = (N0.getOpcode() != ISD::Constant
2206 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2207 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2208 N1 = (N1.getOpcode() != ISD::Constant
2209 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2210 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2211 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2212 DAG.getNode(Opc, MVT::i16, N0, N1));
2216 SDOperand N1 = Op.getOperand(1);
2218 N0 = (N0.getOpcode() != ISD::Constant
2219 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2220 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2221 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::ZERO_EXTEND : ISD::TRUNCATE;
2222 N1 = (N1.getOpcode() != ISD::Constant
2223 ? DAG.getNode(N1Opc, MVT::i16, N1)
2224 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2225 SDOperand ExpandArg =
2226 DAG.getNode(ISD::OR, MVT::i16, N0,
2227 DAG.getNode(ISD::SHL, MVT::i16,
2228 N0, DAG.getConstant(8, MVT::i16)));
2229 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2230 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2234 SDOperand N1 = Op.getOperand(1);
2236 N0 = (N0.getOpcode() != ISD::Constant
2237 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2238 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2239 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::ZERO_EXTEND : ISD::TRUNCATE;
2240 N1 = (N1.getOpcode() != ISD::Constant
2241 ? DAG.getNode(N1Opc, MVT::i16, N1)
2242 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2243 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2244 DAG.getNode(Opc, MVT::i16, N0, N1));
2247 SDOperand N1 = Op.getOperand(1);
2249 N0 = (N0.getOpcode() != ISD::Constant
2250 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2251 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2252 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2253 N1 = (N1.getOpcode() != ISD::Constant
2254 ? DAG.getNode(N1Opc, MVT::i16, N1)
2255 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2256 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2257 DAG.getNode(Opc, MVT::i16, N0, N1));
2260 SDOperand N1 = Op.getOperand(1);
2262 N0 = (N0.getOpcode() != ISD::Constant
2263 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2264 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2265 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2266 N1 = (N1.getOpcode() != ISD::Constant
2267 ? DAG.getNode(N1Opc, MVT::i16, N1)
2268 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2269 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2270 DAG.getNode(Opc, MVT::i16, N0, N1));
2278 static SDOperand LowerI64Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2280 MVT VT = Op.getValueType();
2281 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2283 SDOperand Op0 = Op.getOperand(0);
2286 case ISD::ZERO_EXTEND:
2287 case ISD::SIGN_EXTEND:
2288 case ISD::ANY_EXTEND: {
2289 MVT Op0VT = Op0.getValueType();
2290 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2292 assert(Op0VT == MVT::i32
2293 && "CellSPU: Zero/sign extending something other than i32");
2294 DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2296 unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2297 ? SPUISD::ROTBYTES_RIGHT_S
2298 : SPUISD::ROTQUAD_RZ_BYTES);
2299 SDOperand PromoteScalar =
2300 DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2302 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2303 DAG.getNode(ISD::BIT_CONVERT, VecVT,
2304 DAG.getNode(NewOpc, Op0VecVT,
2306 DAG.getConstant(4, MVT::i32))));
2310 // Turn operands into vectors to satisfy type checking (shufb works on
2313 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2315 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2316 SmallVector<SDOperand, 16> ShufBytes;
2318 // Create the shuffle mask for "rotating" the borrow up one register slot
2319 // once the borrow is generated.
2320 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2321 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2322 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2323 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2325 SDOperand CarryGen =
2326 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2327 SDOperand ShiftedCarry =
2328 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2330 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2331 &ShufBytes[0], ShufBytes.size()));
2333 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2334 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2335 Op0, Op1, ShiftedCarry));
2339 // Turn operands into vectors to satisfy type checking (shufb works on
2342 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2344 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2345 SmallVector<SDOperand, 16> ShufBytes;
2347 // Create the shuffle mask for "rotating" the borrow up one register slot
2348 // once the borrow is generated.
2349 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2350 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2351 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2352 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2354 SDOperand BorrowGen =
2355 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2356 SDOperand ShiftedBorrow =
2357 DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2358 BorrowGen, BorrowGen,
2359 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2360 &ShufBytes[0], ShufBytes.size()));
2362 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2363 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2364 Op0, Op1, ShiftedBorrow));
2368 SDOperand ShiftAmt = Op.getOperand(1);
2369 MVT ShiftAmtVT = ShiftAmt.getValueType();
2370 SDOperand Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2371 SDOperand MaskLower =
2372 DAG.getNode(SPUISD::SELB, VecVT,
2374 DAG.getConstant(0, VecVT),
2375 DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2376 DAG.getConstant(0xff00ULL, MVT::i16)));
2377 SDOperand ShiftAmtBytes =
2378 DAG.getNode(ISD::SRL, ShiftAmtVT,
2380 DAG.getConstant(3, ShiftAmtVT));
2381 SDOperand ShiftAmtBits =
2382 DAG.getNode(ISD::AND, ShiftAmtVT,
2384 DAG.getConstant(7, ShiftAmtVT));
2386 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2387 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2388 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2389 MaskLower, ShiftAmtBytes),
2394 MVT VT = Op.getValueType();
2395 SDOperand ShiftAmt = Op.getOperand(1);
2396 MVT ShiftAmtVT = ShiftAmt.getValueType();
2397 SDOperand ShiftAmtBytes =
2398 DAG.getNode(ISD::SRL, ShiftAmtVT,
2400 DAG.getConstant(3, ShiftAmtVT));
2401 SDOperand ShiftAmtBits =
2402 DAG.getNode(ISD::AND, ShiftAmtVT,
2404 DAG.getConstant(7, ShiftAmtVT));
2406 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2407 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2408 Op0, ShiftAmtBytes),
2413 // Promote Op0 to vector
2415 DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2416 SDOperand ShiftAmt = Op.getOperand(1);
2417 MVT ShiftVT = ShiftAmt.getValueType();
2419 // Negate variable shift amounts
2420 if (!isa<ConstantSDNode>(ShiftAmt)) {
2421 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2422 DAG.getConstant(0, ShiftVT), ShiftAmt);
2425 SDOperand UpperHalfSign =
2426 DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2427 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2428 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2429 Op0, DAG.getConstant(31, MVT::i32))));
2430 SDOperand UpperHalfSignMask =
2431 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2432 SDOperand UpperLowerMask =
2433 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2434 DAG.getConstant(0xff00, MVT::i16));
2435 SDOperand UpperLowerSelect =
2436 DAG.getNode(SPUISD::SELB, MVT::v2i64,
2437 UpperHalfSignMask, Op0, UpperLowerMask);
2438 SDOperand RotateLeftBytes =
2439 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2440 UpperLowerSelect, ShiftAmt);
2441 SDOperand RotateLeftBits =
2442 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2443 RotateLeftBytes, ShiftAmt);
2445 return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2453 //! Lower byte immediate operations for v16i8 vectors:
2455 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2458 MVT VT = Op.getValueType();
2460 ConstVec = Op.getOperand(0);
2461 Arg = Op.getOperand(1);
2462 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2463 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2464 ConstVec = ConstVec.getOperand(0);
2466 ConstVec = Op.getOperand(1);
2467 Arg = Op.getOperand(0);
2468 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2469 ConstVec = ConstVec.getOperand(0);
2474 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2475 uint64_t VectorBits[2];
2476 uint64_t UndefBits[2];
2477 uint64_t SplatBits, SplatUndef;
2480 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2481 && isConstantSplat(VectorBits, UndefBits,
2482 VT.getVectorElementType().getSizeInBits(),
2483 SplatBits, SplatUndef, SplatSize)) {
2484 SDOperand tcVec[16];
2485 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2486 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2488 // Turn the BUILD_VECTOR into a set of target constants:
2489 for (size_t i = 0; i < tcVecSize; ++i)
2492 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2493 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2500 //! Lower i32 multiplication
2501 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, MVT VT,
2503 switch (VT.getSimpleVT()) {
2505 cerr << "CellSPU: Unknown LowerMUL value type, got "
2506 << Op.getValueType().getMVTString()
2512 SDOperand rA = Op.getOperand(0);
2513 SDOperand rB = Op.getOperand(1);
2515 return DAG.getNode(ISD::ADD, MVT::i32,
2516 DAG.getNode(ISD::ADD, MVT::i32,
2517 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2518 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2519 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2526 //! Custom lowering for CTPOP (count population)
2528 Custom lowering code that counts the number ones in the input
2529 operand. SPU has such an instruction, but it counts the number of
2530 ones per byte, which then have to be accumulated.
2532 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2533 MVT VT = Op.getValueType();
2534 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2536 switch (VT.getSimpleVT()) {
2538 assert(false && "Invalid value type!");
2540 SDOperand N = Op.getOperand(0);
2541 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2543 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2544 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2546 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2550 MachineFunction &MF = DAG.getMachineFunction();
2551 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2553 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2555 SDOperand N = Op.getOperand(0);
2556 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2557 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2558 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2560 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2561 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2563 // CNTB_result becomes the chain to which all of the virtual registers
2564 // CNTB_reg, SUM1_reg become associated:
2565 SDOperand CNTB_result =
2566 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2568 SDOperand CNTB_rescopy =
2569 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2571 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2573 return DAG.getNode(ISD::AND, MVT::i16,
2574 DAG.getNode(ISD::ADD, MVT::i16,
2575 DAG.getNode(ISD::SRL, MVT::i16,
2582 MachineFunction &MF = DAG.getMachineFunction();
2583 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2585 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2586 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2588 SDOperand N = Op.getOperand(0);
2589 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2590 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2591 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2592 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2594 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2595 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2597 // CNTB_result becomes the chain to which all of the virtual registers
2598 // CNTB_reg, SUM1_reg become associated:
2599 SDOperand CNTB_result =
2600 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2602 SDOperand CNTB_rescopy =
2603 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2606 DAG.getNode(ISD::SRL, MVT::i32,
2607 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2610 DAG.getNode(ISD::ADD, MVT::i32,
2611 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2613 SDOperand Sum1_rescopy =
2614 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2617 DAG.getNode(ISD::SRL, MVT::i32,
2618 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2621 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2622 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2624 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2634 /// LowerOperation - Provide custom lowering hooks for some operations.
2637 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2639 unsigned Opc = (unsigned) Op.getOpcode();
2640 MVT VT = Op.getValueType();
2644 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2645 cerr << "Op.getOpcode() = " << Opc << "\n";
2646 cerr << "*Op.Val:\n";
2653 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2655 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2656 case ISD::ConstantPool:
2657 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2658 case ISD::GlobalAddress:
2659 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2660 case ISD::JumpTable:
2661 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2663 return LowerConstant(Op, DAG);
2664 case ISD::ConstantFP:
2665 return LowerConstantFP(Op, DAG);
2667 return LowerBRCOND(Op, DAG);
2668 case ISD::FORMAL_ARGUMENTS:
2669 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2671 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2673 return LowerRET(Op, DAG, getTargetMachine());
2676 // i8, i64 math ops:
2677 case ISD::ZERO_EXTEND:
2678 case ISD::SIGN_EXTEND:
2679 case ISD::ANY_EXTEND:
2688 return LowerI8Math(Op, DAG, Opc);
2689 else if (VT == MVT::i64)
2690 return LowerI64Math(Op, DAG, Opc);
2694 // Vector-related lowering.
2695 case ISD::BUILD_VECTOR:
2696 return LowerBUILD_VECTOR(Op, DAG);
2697 case ISD::SCALAR_TO_VECTOR:
2698 return LowerSCALAR_TO_VECTOR(Op, DAG);
2699 case ISD::VECTOR_SHUFFLE:
2700 return LowerVECTOR_SHUFFLE(Op, DAG);
2701 case ISD::EXTRACT_VECTOR_ELT:
2702 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2703 case ISD::INSERT_VECTOR_ELT:
2704 return LowerINSERT_VECTOR_ELT(Op, DAG);
2706 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2710 return LowerByteImmed(Op, DAG);
2712 // Vector and i8 multiply:
2715 return LowerVectorMUL(Op, DAG);
2716 else if (VT == MVT::i8)
2717 return LowerI8Math(Op, DAG, Opc);
2719 return LowerMUL(Op, DAG, VT, Opc);
2722 if (VT == MVT::f32 || VT == MVT::v4f32)
2723 return LowerFDIVf32(Op, DAG);
2724 // else if (Op.getValueType() == MVT::f64)
2725 // return LowerFDIVf64(Op, DAG);
2727 assert(0 && "Calling FDIV on unsupported MVT");
2730 return LowerCTPOP(Op, DAG);
2736 //===----------------------------------------------------------------------===//
2737 // Target Optimization Hooks
2738 //===----------------------------------------------------------------------===//
2741 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2744 TargetMachine &TM = getTargetMachine();
2746 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2747 SelectionDAG &DAG = DCI.DAG;
2748 SDOperand Op0 = N->getOperand(0); // everything has at least one operand
2749 SDOperand Result; // Initially, NULL result
2751 switch (N->getOpcode()) {
2754 SDOperand Op1 = N->getOperand(1);
2756 if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2757 SDOperand Op01 = Op0.getOperand(1);
2758 if (Op01.getOpcode() == ISD::Constant
2759 || Op01.getOpcode() == ISD::TargetConstant) {
2760 // (add <const>, (SPUindirect <arg>, <const>)) ->
2761 // (SPUindirect <arg>, <const + const>)
2762 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2763 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2764 SDOperand combinedConst =
2765 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2766 Op0.getValueType());
2768 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2769 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2770 DEBUG(cerr << "With: (SPUindirect <arg>, "
2771 << CN0->getValue() + CN1->getValue() << ")\n");
2772 return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2773 Op0.getOperand(0), combinedConst);
2775 } else if (isa<ConstantSDNode>(Op0)
2776 && Op1.getOpcode() == SPUISD::IndirectAddr) {
2777 SDOperand Op11 = Op1.getOperand(1);
2778 if (Op11.getOpcode() == ISD::Constant
2779 || Op11.getOpcode() == ISD::TargetConstant) {
2780 // (add (SPUindirect <arg>, <const>), <const>) ->
2781 // (SPUindirect <arg>, <const + const>)
2782 ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2783 ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2784 SDOperand combinedConst =
2785 DAG.getConstant(CN0->getValue() + CN1->getValue(),
2786 Op0.getValueType());
2788 DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2789 << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2790 DEBUG(cerr << "With: (SPUindirect <arg>, "
2791 << CN0->getValue() + CN1->getValue() << ")\n");
2793 return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2794 Op1.getOperand(0), combinedConst);
2799 case ISD::SIGN_EXTEND:
2800 case ISD::ZERO_EXTEND:
2801 case ISD::ANY_EXTEND: {
2802 if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2803 N->getValueType(0) == Op0.getValueType()) {
2804 // (any_extend (SPUextract_elt0 <arg>)) ->
2805 // (SPUextract_elt0 <arg>)
2806 // Types must match, however...
2807 DEBUG(cerr << "Replace: ");
2808 DEBUG(N->dump(&DAG));
2809 DEBUG(cerr << "\nWith: ");
2810 DEBUG(Op0.Val->dump(&DAG));
2811 DEBUG(cerr << "\n");
2817 case SPUISD::IndirectAddr: {
2818 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2819 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2820 if (CN->getValue() == 0) {
2821 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2822 // (SPUaform <addr>, 0)
2824 DEBUG(cerr << "Replace: ");
2825 DEBUG(N->dump(&DAG));
2826 DEBUG(cerr << "\nWith: ");
2827 DEBUG(Op0.Val->dump(&DAG));
2828 DEBUG(cerr << "\n");
2835 case SPUISD::SHLQUAD_L_BITS:
2836 case SPUISD::SHLQUAD_L_BYTES:
2837 case SPUISD::VEC_SHL:
2838 case SPUISD::VEC_SRL:
2839 case SPUISD::VEC_SRA:
2840 case SPUISD::ROTQUAD_RZ_BYTES:
2841 case SPUISD::ROTQUAD_RZ_BITS: {
2842 SDOperand Op1 = N->getOperand(1);
2844 if (isa<ConstantSDNode>(Op1)) {
2845 // Kill degenerate vector shifts:
2846 ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2848 if (CN->getValue() == 0) {
2854 case SPUISD::PROMOTE_SCALAR: {
2855 switch (Op0.getOpcode()) {
2858 case ISD::ANY_EXTEND:
2859 case ISD::ZERO_EXTEND:
2860 case ISD::SIGN_EXTEND: {
2861 // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2863 // but only if the SPUpromote_scalar and <arg> types match.
2864 SDOperand Op00 = Op0.getOperand(0);
2865 if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2866 SDOperand Op000 = Op00.getOperand(0);
2867 if (Op000.getValueType() == N->getValueType(0)) {
2873 case SPUISD::EXTRACT_ELT0: {
2874 // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2876 Result = Op0.getOperand(0);
2883 // Otherwise, return unchanged.
2886 DEBUG(cerr << "\nReplace.SPU: ");
2887 DEBUG(N->dump(&DAG));
2888 DEBUG(cerr << "\nWith: ");
2889 DEBUG(Result.Val->dump(&DAG));
2890 DEBUG(cerr << "\n");
2897 //===----------------------------------------------------------------------===//
2898 // Inline Assembly Support
2899 //===----------------------------------------------------------------------===//
2901 /// getConstraintType - Given a constraint letter, return the type of
2902 /// constraint it is for this target.
2903 SPUTargetLowering::ConstraintType
2904 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2905 if (ConstraintLetter.size() == 1) {
2906 switch (ConstraintLetter[0]) {
2913 return C_RegisterClass;
2916 return TargetLowering::getConstraintType(ConstraintLetter);
2919 std::pair<unsigned, const TargetRegisterClass*>
2920 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2923 if (Constraint.size() == 1) {
2924 // GCC RS6000 Constraint Letters
2925 switch (Constraint[0]) {
2929 return std::make_pair(0U, SPU::R64CRegisterClass);
2930 return std::make_pair(0U, SPU::R32CRegisterClass);
2933 return std::make_pair(0U, SPU::R32FPRegisterClass);
2934 else if (VT == MVT::f64)
2935 return std::make_pair(0U, SPU::R64FPRegisterClass);
2938 return std::make_pair(0U, SPU::GPRCRegisterClass);
2942 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2945 //! Compute used/known bits for a SPU operand
2947 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2951 const SelectionDAG &DAG,
2952 unsigned Depth ) const {
2954 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2957 switch (Op.getOpcode()) {
2959 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2969 case SPUISD::PROMOTE_SCALAR: {
2970 SDOperand Op0 = Op.getOperand(0);
2971 MVT Op0VT = Op0.getValueType();
2972 unsigned Op0VTBits = Op0VT.getSizeInBits();
2973 uint64_t InMask = Op0VT.getIntegerVTBitMask();
2974 KnownZero |= APInt(Op0VTBits, ~InMask, false);
2975 KnownOne |= APInt(Op0VTBits, InMask, false);
2979 case SPUISD::LDRESULT:
2980 case SPUISD::EXTRACT_ELT0:
2981 case SPUISD::EXTRACT_ELT0_CHAINED: {
2982 MVT OpVT = Op.getValueType();
2983 unsigned OpVTBits = OpVT.getSizeInBits();
2984 uint64_t InMask = OpVT.getIntegerVTBitMask();
2985 KnownZero |= APInt(OpVTBits, ~InMask, false);
2986 KnownOne |= APInt(OpVTBits, InMask, false);
2991 case EXTRACT_I1_ZEXT:
2992 case EXTRACT_I1_SEXT:
2993 case EXTRACT_I8_ZEXT:
2994 case EXTRACT_I8_SEXT:
2999 case SPUISD::SHLQUAD_L_BITS:
3000 case SPUISD::SHLQUAD_L_BYTES:
3001 case SPUISD::VEC_SHL:
3002 case SPUISD::VEC_SRL:
3003 case SPUISD::VEC_SRA:
3004 case SPUISD::VEC_ROTL:
3005 case SPUISD::VEC_ROTR:
3006 case SPUISD::ROTQUAD_RZ_BYTES:
3007 case SPUISD::ROTQUAD_RZ_BITS:
3008 case SPUISD::ROTBYTES_RIGHT_S:
3009 case SPUISD::ROTBYTES_LEFT:
3010 case SPUISD::ROTBYTES_LEFT_CHAINED:
3011 case SPUISD::SELECT_MASK:
3013 case SPUISD::FPInterp:
3014 case SPUISD::FPRecipEst:
3015 case SPUISD::SEXT32TO64:
3020 // LowerAsmOperandForConstraint
3022 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
3023 char ConstraintLetter,
3024 std::vector<SDOperand> &Ops,
3025 SelectionDAG &DAG) const {
3026 // Default, for the time being, to the base class handler
3027 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3030 /// isLegalAddressImmediate - Return true if the integer value can be used
3031 /// as the offset of the target addressing mode.
3032 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
3033 // SPU's addresses are 256K:
3034 return (V > -(1 << 18) && V < (1 << 18) - 1);
3037 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {