2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/Constants.h"
19 #include "llvm/Function.h"
20 #include "llvm/Intrinsics.h"
21 #include "llvm/CallingConv.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/SelectionDAG.h"
28 #include "llvm/Target/TargetLoweringObjectFile.h"
29 #include "llvm/Target/TargetOptions.h"
30 #include "llvm/ADT/VectorExtras.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/MathExtras.h"
34 #include "llvm/Support/raw_ostream.h"
39 // Used in getTargetNodeName() below
41 std::map<unsigned, const char *> node_names;
43 //! MVT mapping to useful data for Cell SPU
44 struct valtype_map_s {
46 const int prefslot_byte;
49 const valtype_map_s valtype_map[] = {
60 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
62 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
63 const valtype_map_s *retval = 0;
65 for (size_t i = 0; i < n_valtype_map; ++i) {
66 if (valtype_map[i].valtype == VT) {
67 retval = valtype_map + i;
75 raw_string_ostream Msg(msg);
76 Msg << "getValueTypeMapEntry returns NULL for "
78 llvm_report_error(Msg.str());
85 //! Expand a library call into an actual call DAG node
88 This code is taken from SelectionDAGLegalize, since it is not exposed as
89 part of the LLVM SelectionDAG API.
93 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
94 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
95 // The input chain to this libcall is the entry node of the function.
96 // Legalizing the call will automatically add the previous call to the
98 SDValue InChain = DAG.getEntryNode();
100 TargetLowering::ArgListTy Args;
101 TargetLowering::ArgListEntry Entry;
102 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
103 MVT ArgVT = Op.getOperand(i).getValueType();
104 const Type *ArgTy = ArgVT.getTypeForMVT();
105 Entry.Node = Op.getOperand(i);
107 Entry.isSExt = isSigned;
108 Entry.isZExt = !isSigned;
109 Args.push_back(Entry);
111 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
114 // Splice the libcall in wherever FindInputOutputChains tells us to.
115 const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
116 std::pair<SDValue, SDValue> CallInfo =
117 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
118 0, CallingConv::C, false,
119 /*isReturnValueUsed=*/true,
123 return CallInfo.first;
127 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
128 : TargetLowering(TM, new TargetLoweringObjectFileELF()),
130 // Fold away setcc operations if possible.
133 // Use _setjmp/_longjmp instead of setjmp/longjmp.
134 setUseUnderscoreSetJmp(true);
135 setUseUnderscoreLongJmp(true);
137 // Set RTLIB libcall names as used by SPU:
138 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
140 // Set up the SPU's register classes:
141 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
142 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
143 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
144 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
145 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
146 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
147 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
149 // SPU has no sign or zero extended loads for i1, i8, i16:
150 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
151 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
152 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
154 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
155 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
157 setTruncStoreAction(MVT::i128, MVT::i64, Expand);
158 setTruncStoreAction(MVT::i128, MVT::i32, Expand);
159 setTruncStoreAction(MVT::i128, MVT::i16, Expand);
160 setTruncStoreAction(MVT::i128, MVT::i8, Expand);
162 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
164 // SPU constant load actions are custom lowered:
165 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
166 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
168 // SPU's loads and stores have to be custom lowered:
169 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
171 MVT VT = (MVT::SimpleValueType)sctype;
173 setOperationAction(ISD::LOAD, VT, Custom);
174 setOperationAction(ISD::STORE, VT, Custom);
175 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
176 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
177 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
179 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
180 MVT StoreVT = (MVT::SimpleValueType) stype;
181 setTruncStoreAction(VT, StoreVT, Expand);
185 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
187 MVT VT = (MVT::SimpleValueType) sctype;
189 setOperationAction(ISD::LOAD, VT, Custom);
190 setOperationAction(ISD::STORE, VT, Custom);
192 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
193 MVT StoreVT = (MVT::SimpleValueType) stype;
194 setTruncStoreAction(VT, StoreVT, Expand);
198 // Expand the jumptable branches
199 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
200 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
202 // Custom lower SELECT_CC for most cases, but expand by default
203 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
204 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
205 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
206 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
207 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
209 // SPU has no intrinsics for these particular operations:
210 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
212 // SPU has no division/remainder instructions
213 setOperationAction(ISD::SREM, MVT::i8, Expand);
214 setOperationAction(ISD::UREM, MVT::i8, Expand);
215 setOperationAction(ISD::SDIV, MVT::i8, Expand);
216 setOperationAction(ISD::UDIV, MVT::i8, Expand);
217 setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
218 setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
219 setOperationAction(ISD::SREM, MVT::i16, Expand);
220 setOperationAction(ISD::UREM, MVT::i16, Expand);
221 setOperationAction(ISD::SDIV, MVT::i16, Expand);
222 setOperationAction(ISD::UDIV, MVT::i16, Expand);
223 setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
224 setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
225 setOperationAction(ISD::SREM, MVT::i32, Expand);
226 setOperationAction(ISD::UREM, MVT::i32, Expand);
227 setOperationAction(ISD::SDIV, MVT::i32, Expand);
228 setOperationAction(ISD::UDIV, MVT::i32, Expand);
229 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
230 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
231 setOperationAction(ISD::SREM, MVT::i64, Expand);
232 setOperationAction(ISD::UREM, MVT::i64, Expand);
233 setOperationAction(ISD::SDIV, MVT::i64, Expand);
234 setOperationAction(ISD::UDIV, MVT::i64, Expand);
235 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
236 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
237 setOperationAction(ISD::SREM, MVT::i128, Expand);
238 setOperationAction(ISD::UREM, MVT::i128, Expand);
239 setOperationAction(ISD::SDIV, MVT::i128, Expand);
240 setOperationAction(ISD::UDIV, MVT::i128, Expand);
241 setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
242 setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
244 // We don't support sin/cos/sqrt/fmod
245 setOperationAction(ISD::FSIN , MVT::f64, Expand);
246 setOperationAction(ISD::FCOS , MVT::f64, Expand);
247 setOperationAction(ISD::FREM , MVT::f64, Expand);
248 setOperationAction(ISD::FSIN , MVT::f32, Expand);
249 setOperationAction(ISD::FCOS , MVT::f32, Expand);
250 setOperationAction(ISD::FREM , MVT::f32, Expand);
252 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
254 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
255 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
257 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
258 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
260 // SPU can do rotate right and left, so legalize it... but customize for i8
261 // because instructions don't exist.
263 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
265 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
266 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
267 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
269 setOperationAction(ISD::ROTL, MVT::i32, Legal);
270 setOperationAction(ISD::ROTL, MVT::i16, Legal);
271 setOperationAction(ISD::ROTL, MVT::i8, Custom);
273 // SPU has no native version of shift left/right for i8
274 setOperationAction(ISD::SHL, MVT::i8, Custom);
275 setOperationAction(ISD::SRL, MVT::i8, Custom);
276 setOperationAction(ISD::SRA, MVT::i8, Custom);
278 // Make these operations legal and handle them during instruction selection:
279 setOperationAction(ISD::SHL, MVT::i64, Legal);
280 setOperationAction(ISD::SRL, MVT::i64, Legal);
281 setOperationAction(ISD::SRA, MVT::i64, Legal);
283 // Custom lower i8, i32 and i64 multiplications
284 setOperationAction(ISD::MUL, MVT::i8, Custom);
285 setOperationAction(ISD::MUL, MVT::i32, Legal);
286 setOperationAction(ISD::MUL, MVT::i64, Legal);
288 // Expand double-width multiplication
289 // FIXME: It would probably be reasonable to support some of these operations
290 setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
291 setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
292 setOperationAction(ISD::MULHU, MVT::i8, Expand);
293 setOperationAction(ISD::MULHS, MVT::i8, Expand);
294 setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
295 setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
296 setOperationAction(ISD::MULHU, MVT::i16, Expand);
297 setOperationAction(ISD::MULHS, MVT::i16, Expand);
298 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
299 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
300 setOperationAction(ISD::MULHU, MVT::i32, Expand);
301 setOperationAction(ISD::MULHS, MVT::i32, Expand);
302 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
303 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
304 setOperationAction(ISD::MULHU, MVT::i64, Expand);
305 setOperationAction(ISD::MULHS, MVT::i64, Expand);
307 // Need to custom handle (some) common i8, i64 math ops
308 setOperationAction(ISD::ADD, MVT::i8, Custom);
309 setOperationAction(ISD::ADD, MVT::i64, Legal);
310 setOperationAction(ISD::SUB, MVT::i8, Custom);
311 setOperationAction(ISD::SUB, MVT::i64, Legal);
313 // SPU does not have BSWAP. It does have i32 support CTLZ.
314 // CTPOP has to be custom lowered.
315 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
316 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
318 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
319 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
320 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
321 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
322 setOperationAction(ISD::CTPOP, MVT::i128, Expand);
324 setOperationAction(ISD::CTTZ , MVT::i8, Expand);
325 setOperationAction(ISD::CTTZ , MVT::i16, Expand);
326 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
327 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
328 setOperationAction(ISD::CTTZ , MVT::i128, Expand);
330 setOperationAction(ISD::CTLZ , MVT::i8, Promote);
331 setOperationAction(ISD::CTLZ , MVT::i16, Promote);
332 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
333 setOperationAction(ISD::CTLZ , MVT::i64, Expand);
334 setOperationAction(ISD::CTLZ , MVT::i128, Expand);
336 // SPU has a version of select that implements (a&~c)|(b&c), just like
337 // select ought to work:
338 setOperationAction(ISD::SELECT, MVT::i8, Legal);
339 setOperationAction(ISD::SELECT, MVT::i16, Legal);
340 setOperationAction(ISD::SELECT, MVT::i32, Legal);
341 setOperationAction(ISD::SELECT, MVT::i64, Legal);
343 setOperationAction(ISD::SETCC, MVT::i8, Legal);
344 setOperationAction(ISD::SETCC, MVT::i16, Legal);
345 setOperationAction(ISD::SETCC, MVT::i32, Legal);
346 setOperationAction(ISD::SETCC, MVT::i64, Legal);
347 setOperationAction(ISD::SETCC, MVT::f64, Custom);
349 // Custom lower i128 -> i64 truncates
350 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
352 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
353 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
354 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
355 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
356 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
357 // to expand to a libcall, hence the custom lowering:
358 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
359 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
360 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
361 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
362 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
363 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
365 // FDIV on SPU requires custom lowering
366 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
368 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
369 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
370 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
371 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
372 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
373 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
374 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
375 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
376 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
378 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
379 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
380 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
381 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
383 // We cannot sextinreg(i1). Expand to shifts.
384 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
386 // Support label based line numbers.
387 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
388 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
390 // We want to legalize GlobalAddress and ConstantPool nodes into the
391 // appropriate instructions to materialize the address.
392 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
394 MVT VT = (MVT::SimpleValueType)sctype;
396 setOperationAction(ISD::GlobalAddress, VT, Custom);
397 setOperationAction(ISD::ConstantPool, VT, Custom);
398 setOperationAction(ISD::JumpTable, VT, Custom);
401 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
402 setOperationAction(ISD::VASTART , MVT::Other, Custom);
404 // Use the default implementation.
405 setOperationAction(ISD::VAARG , MVT::Other, Expand);
406 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
407 setOperationAction(ISD::VAEND , MVT::Other, Expand);
408 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
409 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
410 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
411 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
413 // Cell SPU has instructions for converting between i64 and fp.
414 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
415 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
417 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
418 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
420 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
421 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
423 // First set operation action for all vector types to expand. Then we
424 // will selectively turn on ones that can be effectively codegen'd.
425 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
426 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
427 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
428 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
429 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
430 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
432 // "Odd size" vector classes that we're willing to support:
433 addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
435 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
436 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
437 MVT VT = (MVT::SimpleValueType)i;
439 // add/sub are legal for all supported vector VT's.
440 setOperationAction(ISD::ADD, VT, Legal);
441 setOperationAction(ISD::SUB, VT, Legal);
442 // mul has to be custom lowered.
443 setOperationAction(ISD::MUL, VT, Legal);
445 setOperationAction(ISD::AND, VT, Legal);
446 setOperationAction(ISD::OR, VT, Legal);
447 setOperationAction(ISD::XOR, VT, Legal);
448 setOperationAction(ISD::LOAD, VT, Legal);
449 setOperationAction(ISD::SELECT, VT, Legal);
450 setOperationAction(ISD::STORE, VT, Legal);
452 // These operations need to be expanded:
453 setOperationAction(ISD::SDIV, VT, Expand);
454 setOperationAction(ISD::SREM, VT, Expand);
455 setOperationAction(ISD::UDIV, VT, Expand);
456 setOperationAction(ISD::UREM, VT, Expand);
458 // Custom lower build_vector, constant pool spills, insert and
459 // extract vector elements:
460 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
461 setOperationAction(ISD::ConstantPool, VT, Custom);
462 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
463 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
464 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
465 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
468 setOperationAction(ISD::AND, MVT::v16i8, Custom);
469 setOperationAction(ISD::OR, MVT::v16i8, Custom);
470 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
471 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
473 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
475 setShiftAmountType(MVT::i32);
476 setBooleanContents(ZeroOrNegativeOneBooleanContent);
478 setStackPointerRegisterToSaveRestore(SPU::R1);
480 // We have target-specific dag combine patterns for the following nodes:
481 setTargetDAGCombine(ISD::ADD);
482 setTargetDAGCombine(ISD::ZERO_EXTEND);
483 setTargetDAGCombine(ISD::SIGN_EXTEND);
484 setTargetDAGCombine(ISD::ANY_EXTEND);
486 computeRegisterProperties();
488 // Set pre-RA register scheduler default to BURR, which produces slightly
489 // better code than the default (could also be TDRR, but TargetLowering.h
490 // needs a mod to support that model):
491 setSchedulingPreference(SchedulingForRegPressure);
495 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
497 if (node_names.empty()) {
498 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
499 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
500 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
501 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
502 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
503 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
504 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
505 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
506 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
507 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
508 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
509 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
510 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
511 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
512 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
513 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
514 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
515 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
516 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
517 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
518 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
519 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
520 "SPUISD::ROTBYTES_LEFT_BITS";
521 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
522 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
523 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
524 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
525 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
528 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
530 return ((i != node_names.end()) ? i->second : 0);
533 /// getFunctionAlignment - Return the Log2 alignment of this function.
534 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
538 //===----------------------------------------------------------------------===//
539 // Return the Cell SPU's SETCC result type
540 //===----------------------------------------------------------------------===//
542 MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
543 // i16 and i32 are valid SETCC result types
544 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
547 //===----------------------------------------------------------------------===//
548 // Calling convention code:
549 //===----------------------------------------------------------------------===//
551 #include "SPUGenCallingConv.inc"
553 //===----------------------------------------------------------------------===//
554 // LowerOperation implementation
555 //===----------------------------------------------------------------------===//
557 /// Custom lower loads for CellSPU
559 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
560 within a 16-byte block, we have to rotate to extract the requested element.
562 For extending loads, we also want to ensure that the following sequence is
563 emitted, e.g. for MVT::f32 extending load to MVT::f64:
567 %2 v16i8,ch = rotate %1
568 %3 v4f8, ch = bitconvert %2
569 %4 f32 = vec2perfslot %3
570 %5 f64 = fp_extend %4
574 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
575 LoadSDNode *LN = cast<LoadSDNode>(Op);
576 SDValue the_chain = LN->getChain();
577 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
578 MVT InVT = LN->getMemoryVT();
579 MVT OutVT = Op.getValueType();
580 ISD::LoadExtType ExtType = LN->getExtensionType();
581 unsigned alignment = LN->getAlignment();
582 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
583 DebugLoc dl = Op.getDebugLoc();
585 switch (LN->getAddressingMode()) {
586 case ISD::UNINDEXED: {
588 SDValue basePtr = LN->getBasePtr();
591 if (alignment == 16) {
594 // Special cases for a known aligned load to simplify the base pointer
595 // and the rotation amount:
596 if (basePtr.getOpcode() == ISD::ADD
597 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
598 // Known offset into basePtr
599 int64_t offset = CN->getSExtValue();
600 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
605 rotate = DAG.getConstant(rotamt, MVT::i16);
607 // Simplify the base pointer for this case:
608 basePtr = basePtr.getOperand(0);
609 if ((offset & ~0xf) > 0) {
610 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
612 DAG.getConstant((offset & ~0xf), PtrVT));
614 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
615 || (basePtr.getOpcode() == SPUISD::IndirectAddr
616 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
617 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
618 // Plain aligned a-form address: rotate into preferred slot
619 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
620 int64_t rotamt = -vtm->prefslot_byte;
623 rotate = DAG.getConstant(rotamt, MVT::i16);
625 // Offset the rotate amount by the basePtr and the preferred slot
627 int64_t rotamt = -vtm->prefslot_byte;
630 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
632 DAG.getConstant(rotamt, PtrVT));
635 // Unaligned load: must be more pessimistic about addressing modes:
636 if (basePtr.getOpcode() == ISD::ADD) {
637 MachineFunction &MF = DAG.getMachineFunction();
638 MachineRegisterInfo &RegInfo = MF.getRegInfo();
639 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
642 SDValue Op0 = basePtr.getOperand(0);
643 SDValue Op1 = basePtr.getOperand(1);
645 if (isa<ConstantSDNode>(Op1)) {
646 // Convert the (add <ptr>, <const>) to an indirect address contained
647 // in a register. Note that this is done because we need to avoid
648 // creating a 0(reg) d-form address due to the SPU's block loads.
649 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
650 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
651 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
653 // Convert the (add <arg1>, <arg2>) to an indirect address, which
654 // will likely be lowered as a reg(reg) x-form address.
655 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
658 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
660 DAG.getConstant(0, PtrVT));
663 // Offset the rotate amount by the basePtr and the preferred slot
665 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
667 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
670 // Re-emit as a v16i8 vector load
671 result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
672 LN->getSrcValue(), LN->getSrcValueOffset(),
673 LN->isVolatile(), 16);
676 the_chain = result.getValue(1);
678 // Rotate into the preferred slot:
679 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
680 result.getValue(0), rotate);
682 // Convert the loaded v16i8 vector to the appropriate vector type
683 // specified by the operand:
684 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
685 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
686 DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
688 // Handle extending loads by extending the scalar result:
689 if (ExtType == ISD::SEXTLOAD) {
690 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
691 } else if (ExtType == ISD::ZEXTLOAD) {
692 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
693 } else if (ExtType == ISD::EXTLOAD) {
694 unsigned NewOpc = ISD::ANY_EXTEND;
696 if (OutVT.isFloatingPoint())
697 NewOpc = ISD::FP_EXTEND;
699 result = DAG.getNode(NewOpc, dl, OutVT, result);
702 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
703 SDValue retops[2] = {
708 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
709 retops, sizeof(retops) / sizeof(retops[0]));
716 case ISD::LAST_INDEXED_MODE:
719 raw_string_ostream Msg(msg);
720 Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
722 Msg << (unsigned) LN->getAddressingMode();
723 llvm_report_error(Msg.str());
731 /// Custom lower stores for CellSPU
733 All CellSPU stores are aligned to 16-byte boundaries, so for elements
734 within a 16-byte block, we have to generate a shuffle to insert the
735 requested element into its place, then store the resulting block.
738 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
739 StoreSDNode *SN = cast<StoreSDNode>(Op);
740 SDValue Value = SN->getValue();
741 MVT VT = Value.getValueType();
742 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
743 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
744 DebugLoc dl = Op.getDebugLoc();
745 unsigned alignment = SN->getAlignment();
747 switch (SN->getAddressingMode()) {
748 case ISD::UNINDEXED: {
749 // The vector type we really want to load from the 16-byte chunk.
750 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
751 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
753 SDValue alignLoadVec;
754 SDValue basePtr = SN->getBasePtr();
755 SDValue the_chain = SN->getChain();
756 SDValue insertEltOffs;
758 if (alignment == 16) {
761 // Special cases for a known aligned load to simplify the base pointer
762 // and insertion byte:
763 if (basePtr.getOpcode() == ISD::ADD
764 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
765 // Known offset into basePtr
766 int64_t offset = CN->getSExtValue();
768 // Simplify the base pointer for this case:
769 basePtr = basePtr.getOperand(0);
770 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
772 DAG.getConstant((offset & 0xf), PtrVT));
774 if ((offset & ~0xf) > 0) {
775 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
777 DAG.getConstant((offset & ~0xf), PtrVT));
780 // Otherwise, assume it's at byte 0 of basePtr
781 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
783 DAG.getConstant(0, PtrVT));
786 // Unaligned load: must be more pessimistic about addressing modes:
787 if (basePtr.getOpcode() == ISD::ADD) {
788 MachineFunction &MF = DAG.getMachineFunction();
789 MachineRegisterInfo &RegInfo = MF.getRegInfo();
790 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
793 SDValue Op0 = basePtr.getOperand(0);
794 SDValue Op1 = basePtr.getOperand(1);
796 if (isa<ConstantSDNode>(Op1)) {
797 // Convert the (add <ptr>, <const>) to an indirect address contained
798 // in a register. Note that this is done because we need to avoid
799 // creating a 0(reg) d-form address due to the SPU's block loads.
800 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
801 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
802 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
804 // Convert the (add <arg1>, <arg2>) to an indirect address, which
805 // will likely be lowered as a reg(reg) x-form address.
806 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
809 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
811 DAG.getConstant(0, PtrVT));
814 // Insertion point is solely determined by basePtr's contents
815 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
817 DAG.getConstant(0, PtrVT));
820 // Re-emit as a v16i8 vector load
821 alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
822 SN->getSrcValue(), SN->getSrcValueOffset(),
823 SN->isVolatile(), 16);
826 the_chain = alignLoadVec.getValue(1);
828 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
829 SDValue theValue = SN->getValue();
833 && (theValue.getOpcode() == ISD::AssertZext
834 || theValue.getOpcode() == ISD::AssertSext)) {
835 // Drill down and get the value for zero- and sign-extended
837 theValue = theValue.getOperand(0);
840 // If the base pointer is already a D-form address, then just create
841 // a new D-form address with a slot offset and the orignal base pointer.
842 // Otherwise generate a D-form address with the slot offset relative
843 // to the stack pointer, which is always aligned.
845 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
846 cerr << "CellSPU LowerSTORE: basePtr = ";
847 basePtr.getNode()->dump(&DAG);
852 SDValue insertEltOp =
853 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
854 SDValue vectorizeOp =
855 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
857 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
858 vectorizeOp, alignLoadVec,
859 DAG.getNode(ISD::BIT_CONVERT, dl,
860 MVT::v4i32, insertEltOp));
862 result = DAG.getStore(the_chain, dl, result, basePtr,
863 LN->getSrcValue(), LN->getSrcValueOffset(),
864 LN->isVolatile(), LN->getAlignment());
866 #if 0 && !defined(NDEBUG)
867 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
868 const SDValue ¤tRoot = DAG.getRoot();
871 cerr << "------- CellSPU:LowerStore result:\n";
874 DAG.setRoot(currentRoot);
885 case ISD::LAST_INDEXED_MODE:
888 raw_string_ostream Msg(msg);
889 Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
891 Msg << (unsigned) SN->getAddressingMode();
892 llvm_report_error(Msg.str());
900 //! Generate the address of a constant pool entry.
902 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
903 MVT PtrVT = Op.getValueType();
904 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
905 Constant *C = CP->getConstVal();
906 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
907 SDValue Zero = DAG.getConstant(0, PtrVT);
908 const TargetMachine &TM = DAG.getTarget();
909 // FIXME there is no actual debug info here
910 DebugLoc dl = Op.getDebugLoc();
912 if (TM.getRelocationModel() == Reloc::Static) {
913 if (!ST->usingLargeMem()) {
914 // Just return the SDValue with the constant pool address in it.
915 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
917 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
918 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
919 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
923 llvm_unreachable("LowerConstantPool: Relocation model other than static"
928 //! Alternate entry point for generating the address of a constant pool entry
930 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
931 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
935 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
936 MVT PtrVT = Op.getValueType();
937 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
938 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
939 SDValue Zero = DAG.getConstant(0, PtrVT);
940 const TargetMachine &TM = DAG.getTarget();
941 // FIXME there is no actual debug info here
942 DebugLoc dl = Op.getDebugLoc();
944 if (TM.getRelocationModel() == Reloc::Static) {
945 if (!ST->usingLargeMem()) {
946 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
948 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
949 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
950 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
954 llvm_unreachable("LowerJumpTable: Relocation model other than static"
960 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
961 MVT PtrVT = Op.getValueType();
962 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
963 GlobalValue *GV = GSDN->getGlobal();
964 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
965 const TargetMachine &TM = DAG.getTarget();
966 SDValue Zero = DAG.getConstant(0, PtrVT);
967 // FIXME there is no actual debug info here
968 DebugLoc dl = Op.getDebugLoc();
970 if (TM.getRelocationModel() == Reloc::Static) {
971 if (!ST->usingLargeMem()) {
972 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
974 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
975 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
976 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
979 llvm_report_error("LowerGlobalAddress: Relocation model other than static"
987 //! Custom lower double precision floating point constants
989 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
990 MVT VT = Op.getValueType();
991 // FIXME there is no actual debug info here
992 DebugLoc dl = Op.getDebugLoc();
994 if (VT == MVT::f64) {
995 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
998 "LowerConstantFP: Node is not ConstantFPSDNode");
1000 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1001 SDValue T = DAG.getConstant(dbits, MVT::i64);
1002 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1003 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1004 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
1011 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1012 unsigned CallConv, bool isVarArg,
1013 const SmallVectorImpl<ISD::InputArg>
1015 DebugLoc dl, SelectionDAG &DAG,
1016 SmallVectorImpl<SDValue> &InVals) {
1018 MachineFunction &MF = DAG.getMachineFunction();
1019 MachineFrameInfo *MFI = MF.getFrameInfo();
1020 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1022 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1023 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1025 unsigned ArgOffset = SPUFrameInfo::minStackSize();
1026 unsigned ArgRegIdx = 0;
1027 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1029 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1031 // Add DAG nodes to load the arguments or copy them out of registers.
1032 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1033 MVT ObjectVT = Ins[ArgNo].VT;
1034 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1037 if (ArgRegIdx < NumArgRegs) {
1038 const TargetRegisterClass *ArgRegClass;
1040 switch (ObjectVT.getSimpleVT()) {
1043 raw_string_ostream Msg(msg);
1044 Msg << "LowerFormalArguments Unhandled argument type: "
1045 << ObjectVT.getMVTString();
1046 llvm_report_error(Msg.str());
1049 ArgRegClass = &SPU::R8CRegClass;
1052 ArgRegClass = &SPU::R16CRegClass;
1055 ArgRegClass = &SPU::R32CRegClass;
1058 ArgRegClass = &SPU::R64CRegClass;
1061 ArgRegClass = &SPU::GPRCRegClass;
1064 ArgRegClass = &SPU::R32FPRegClass;
1067 ArgRegClass = &SPU::R64FPRegClass;
1075 ArgRegClass = &SPU::VECREGRegClass;
1079 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1080 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1081 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1084 // We need to load the argument to a virtual register if we determined
1085 // above that we ran out of physical registers of the appropriate type
1086 // or we're forced to do vararg
1087 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1088 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1089 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
1090 ArgOffset += StackSlotSize;
1093 InVals.push_back(ArgVal);
1095 Chain = ArgVal.getOperand(0);
1100 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1101 // We will spill (79-3)+1 registers to the stack
1102 SmallVector<SDValue, 79-3+1> MemOps;
1104 // Create the frame slot
1106 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1107 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1108 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1109 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1110 SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0);
1111 Chain = Store.getOperand(0);
1112 MemOps.push_back(Store);
1114 // Increment address by stack slot size for the next stored argument
1115 ArgOffset += StackSlotSize;
1117 if (!MemOps.empty())
1118 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1119 &MemOps[0], MemOps.size());
1125 /// isLSAAddress - Return the immediate to use if the specified
1126 /// value is representable as a LSA address.
1127 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1128 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1131 int Addr = C->getZExtValue();
1132 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1133 (Addr << 14 >> 14) != Addr)
1134 return 0; // Top 14 bits have to be sext of immediate.
1136 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1140 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1141 unsigned CallConv, bool isVarArg,
1143 const SmallVectorImpl<ISD::OutputArg> &Outs,
1144 const SmallVectorImpl<ISD::InputArg> &Ins,
1145 DebugLoc dl, SelectionDAG &DAG,
1146 SmallVectorImpl<SDValue> &InVals) {
1148 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1149 unsigned NumOps = Outs.size();
1150 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1151 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1152 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1154 // Handy pointer type
1155 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1157 // Accumulate how many bytes are to be pushed on the stack, including the
1158 // linkage area, and parameter passing area. According to the SPU ABI,
1159 // we minimally need space for [LR] and [SP]
1160 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1162 // Set up a copy of the stack pointer for use loading and storing any
1163 // arguments that may not fit in the registers available for argument
1165 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1167 // Figure out which arguments are going to go in registers, and which in
1169 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1170 unsigned ArgRegIdx = 0;
1172 // Keep track of registers passing arguments
1173 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1174 // And the arguments passed on the stack
1175 SmallVector<SDValue, 8> MemOpChains;
1177 for (unsigned i = 0; i != NumOps; ++i) {
1178 SDValue Arg = Outs[i].Val;
1180 // PtrOff will be used to store the current argument to the stack if a
1181 // register cannot be found for it.
1182 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1183 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1185 switch (Arg.getValueType().getSimpleVT()) {
1186 default: llvm_unreachable("Unexpected ValueType for argument!");
1192 if (ArgRegIdx != NumArgRegs) {
1193 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1195 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1196 ArgOffset += StackSlotSize;
1201 if (ArgRegIdx != NumArgRegs) {
1202 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1204 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1205 ArgOffset += StackSlotSize;
1214 if (ArgRegIdx != NumArgRegs) {
1215 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1217 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1218 ArgOffset += StackSlotSize;
1224 // Update number of stack bytes actually used, insert a call sequence start
1225 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1226 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1229 if (!MemOpChains.empty()) {
1230 // Adjust the stack pointer for the stack arguments.
1231 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1232 &MemOpChains[0], MemOpChains.size());
1235 // Build a sequence of copy-to-reg nodes chained together with token chain
1236 // and flag operands which copy the outgoing args into the appropriate regs.
1238 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1239 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1240 RegsToPass[i].second, InFlag);
1241 InFlag = Chain.getValue(1);
1244 SmallVector<SDValue, 8> Ops;
1245 unsigned CallOpc = SPUISD::CALL;
1247 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1248 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1249 // node so that legalize doesn't hack it.
1250 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1251 GlobalValue *GV = G->getGlobal();
1252 MVT CalleeVT = Callee.getValueType();
1253 SDValue Zero = DAG.getConstant(0, PtrVT);
1254 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1256 if (!ST->usingLargeMem()) {
1257 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1258 // style calls, otherwise, external symbols are BRASL calls. This assumes
1259 // that declared/defined symbols are in the same compilation unit and can
1260 // be reached through PC-relative jumps.
1263 // This may be an unsafe assumption for JIT and really large compilation
1265 if (GV->isDeclaration()) {
1266 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1268 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1271 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1273 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1275 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1276 MVT CalleeVT = Callee.getValueType();
1277 SDValue Zero = DAG.getConstant(0, PtrVT);
1278 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1279 Callee.getValueType());
1281 if (!ST->usingLargeMem()) {
1282 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1284 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1286 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1287 // If this is an absolute destination address that appears to be a legal
1288 // local store address, use the munged value.
1289 Callee = SDValue(Dest, 0);
1292 Ops.push_back(Chain);
1293 Ops.push_back(Callee);
1295 // Add argument registers to the end of the list so that they are known live
1297 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1298 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1299 RegsToPass[i].second.getValueType()));
1301 if (InFlag.getNode())
1302 Ops.push_back(InFlag);
1303 // Returns a chain and a flag for retval copy to use.
1304 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1305 &Ops[0], Ops.size());
1306 InFlag = Chain.getValue(1);
1308 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1309 DAG.getIntPtrConstant(0, true), InFlag);
1311 InFlag = Chain.getValue(1);
1313 // If the function returns void, just return the chain.
1317 // If the call has results, copy the values out of the ret val registers.
1318 switch (Ins[0].VT.getSimpleVT()) {
1319 default: llvm_unreachable("Unexpected ret value!");
1320 case MVT::Other: break;
1322 if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
1323 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1324 MVT::i32, InFlag).getValue(1);
1325 InVals.push_back(Chain.getValue(0));
1326 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1327 Chain.getValue(2)).getValue(1);
1328 InVals.push_back(Chain.getValue(0));
1330 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1331 InFlag).getValue(1);
1332 InVals.push_back(Chain.getValue(0));
1336 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1337 InFlag).getValue(1);
1338 InVals.push_back(Chain.getValue(0));
1341 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1342 InFlag).getValue(1);
1343 InVals.push_back(Chain.getValue(0));
1347 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1348 InFlag).getValue(1);
1349 InVals.push_back(Chain.getValue(0));
1357 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1358 InFlag).getValue(1);
1359 InVals.push_back(Chain.getValue(0));
1367 SPUTargetLowering::LowerReturn(SDValue Chain,
1368 unsigned CallConv, bool isVarArg,
1369 const SmallVectorImpl<ISD::OutputArg> &Outs,
1370 DebugLoc dl, SelectionDAG &DAG) {
1372 SmallVector<CCValAssign, 16> RVLocs;
1373 CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1374 RVLocs, *DAG.getContext());
1375 CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1377 // If this is the first return lowered for this function, add the regs to the
1378 // liveout set for the function.
1379 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1380 for (unsigned i = 0; i != RVLocs.size(); ++i)
1381 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1386 // Copy the result values into the output registers.
1387 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1388 CCValAssign &VA = RVLocs[i];
1389 assert(VA.isRegLoc() && "Can only return in registers!");
1390 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1392 Flag = Chain.getValue(1);
1396 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1398 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1402 //===----------------------------------------------------------------------===//
1403 // Vector related lowering:
1404 //===----------------------------------------------------------------------===//
1406 static ConstantSDNode *
1407 getVecImm(SDNode *N) {
1408 SDValue OpVal(0, 0);
1410 // Check to see if this buildvec has a single non-undef value in its elements.
1411 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1412 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1413 if (OpVal.getNode() == 0)
1414 OpVal = N->getOperand(i);
1415 else if (OpVal != N->getOperand(i))
1419 if (OpVal.getNode() != 0) {
1420 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1428 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1429 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1431 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1433 if (ConstantSDNode *CN = getVecImm(N)) {
1434 uint64_t Value = CN->getZExtValue();
1435 if (ValueType == MVT::i64) {
1436 uint64_t UValue = CN->getZExtValue();
1437 uint32_t upper = uint32_t(UValue >> 32);
1438 uint32_t lower = uint32_t(UValue);
1441 Value = Value >> 32;
1443 if (Value <= 0x3ffff)
1444 return DAG.getTargetConstant(Value, ValueType);
1450 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1451 /// and the value fits into a signed 16-bit constant, and if so, return the
1453 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1455 if (ConstantSDNode *CN = getVecImm(N)) {
1456 int64_t Value = CN->getSExtValue();
1457 if (ValueType == MVT::i64) {
1458 uint64_t UValue = CN->getZExtValue();
1459 uint32_t upper = uint32_t(UValue >> 32);
1460 uint32_t lower = uint32_t(UValue);
1463 Value = Value >> 32;
1465 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1466 return DAG.getTargetConstant(Value, ValueType);
1473 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1474 /// and the value fits into a signed 10-bit constant, and if so, return the
1476 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1478 if (ConstantSDNode *CN = getVecImm(N)) {
1479 int64_t Value = CN->getSExtValue();
1480 if (ValueType == MVT::i64) {
1481 uint64_t UValue = CN->getZExtValue();
1482 uint32_t upper = uint32_t(UValue >> 32);
1483 uint32_t lower = uint32_t(UValue);
1486 Value = Value >> 32;
1488 if (isS10Constant(Value))
1489 return DAG.getTargetConstant(Value, ValueType);
1495 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1496 /// and the value fits into a signed 8-bit constant, and if so, return the
1499 /// @note: The incoming vector is v16i8 because that's the only way we can load
1500 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1502 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1504 if (ConstantSDNode *CN = getVecImm(N)) {
1505 int Value = (int) CN->getZExtValue();
1506 if (ValueType == MVT::i16
1507 && Value <= 0xffff /* truncated from uint64_t */
1508 && ((short) Value >> 8) == ((short) Value & 0xff))
1509 return DAG.getTargetConstant(Value & 0xff, ValueType);
1510 else if (ValueType == MVT::i8
1511 && (Value & 0xff) == Value)
1512 return DAG.getTargetConstant(Value, ValueType);
1518 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1519 /// and the value fits into a signed 16-bit constant, and if so, return the
1521 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1523 if (ConstantSDNode *CN = getVecImm(N)) {
1524 uint64_t Value = CN->getZExtValue();
1525 if ((ValueType == MVT::i32
1526 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1527 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1528 return DAG.getTargetConstant(Value >> 16, ValueType);
1534 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1535 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1536 if (ConstantSDNode *CN = getVecImm(N)) {
1537 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1543 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1544 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1545 if (ConstantSDNode *CN = getVecImm(N)) {
1546 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1552 //! Lower a BUILD_VECTOR instruction creatively:
1554 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1555 MVT VT = Op.getValueType();
1556 MVT EltVT = VT.getVectorElementType();
1557 DebugLoc dl = Op.getDebugLoc();
1558 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1559 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1560 unsigned minSplatBits = EltVT.getSizeInBits();
1562 if (minSplatBits < 16)
1565 APInt APSplatBits, APSplatUndef;
1566 unsigned SplatBitSize;
1569 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1570 HasAnyUndefs, minSplatBits)
1571 || minSplatBits < SplatBitSize)
1572 return SDValue(); // Wasn't a constant vector or splat exceeded min
1574 uint64_t SplatBits = APSplatBits.getZExtValue();
1576 switch (VT.getSimpleVT()) {
1579 raw_string_ostream Msg(msg);
1580 Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1581 << VT.getMVTString();
1582 llvm_report_error(Msg.str());
1586 uint32_t Value32 = uint32_t(SplatBits);
1587 assert(SplatBitSize == 32
1588 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1589 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1590 SDValue T = DAG.getConstant(Value32, MVT::i32);
1591 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1592 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1596 uint64_t f64val = uint64_t(SplatBits);
1597 assert(SplatBitSize == 64
1598 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1599 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1600 SDValue T = DAG.getConstant(f64val, MVT::i64);
1601 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1602 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1606 // 8-bit constants have to be expanded to 16-bits
1607 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1608 SmallVector<SDValue, 8> Ops;
1610 Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1611 return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1612 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1615 unsigned short Value16 = SplatBits;
1616 SDValue T = DAG.getConstant(Value16, EltVT);
1617 SmallVector<SDValue, 8> Ops;
1620 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1623 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1624 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1627 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1628 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1631 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1641 SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1643 uint32_t upper = uint32_t(SplatVal >> 32);
1644 uint32_t lower = uint32_t(SplatVal);
1646 if (upper == lower) {
1647 // Magic constant that can be matched by IL, ILA, et. al.
1648 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1649 return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1650 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1651 Val, Val, Val, Val));
1653 bool upper_special, lower_special;
1655 // NOTE: This code creates common-case shuffle masks that can be easily
1656 // detected as common expressions. It is not attempting to create highly
1657 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1659 // Detect if the upper or lower half is a special shuffle mask pattern:
1660 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1661 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1663 // Both upper and lower are special, lower to a constant pool load:
1664 if (lower_special && upper_special) {
1665 SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1666 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1667 SplatValCN, SplatValCN);
1672 SmallVector<SDValue, 16> ShufBytes;
1675 // Create lower vector if not a special pattern
1676 if (!lower_special) {
1677 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1678 LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1679 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1680 LO32C, LO32C, LO32C, LO32C));
1683 // Create upper vector if not a special pattern
1684 if (!upper_special) {
1685 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1686 HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1687 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1688 HI32C, HI32C, HI32C, HI32C));
1691 // If either upper or lower are special, then the two input operands are
1692 // the same (basically, one of them is a "don't care")
1698 for (int i = 0; i < 4; ++i) {
1700 for (int j = 0; j < 4; ++j) {
1702 bool process_upper, process_lower;
1704 process_upper = (upper_special && (i & 1) == 0);
1705 process_lower = (lower_special && (i & 1) == 1);
1707 if (process_upper || process_lower) {
1708 if ((process_upper && upper == 0)
1709 || (process_lower && lower == 0))
1711 else if ((process_upper && upper == 0xffffffff)
1712 || (process_lower && lower == 0xffffffff))
1714 else if ((process_upper && upper == 0x80000000)
1715 || (process_lower && lower == 0x80000000))
1716 val |= (j == 0 ? 0xe0 : 0x80);
1718 val |= i * 4 + j + ((i & 1) * 16);
1721 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1724 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1725 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1726 &ShufBytes[0], ShufBytes.size()));
1730 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1731 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1732 /// permutation vector, V3, is monotonically increasing with one "exception"
1733 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1734 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1735 /// In either case, the net result is going to eventually invoke SHUFB to
1736 /// permute/shuffle the bytes from V1 and V2.
1738 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1739 /// control word for byte/halfword/word insertion. This takes care of a single
1740 /// element move from V2 into V1.
1742 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1743 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1744 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1745 SDValue V1 = Op.getOperand(0);
1746 SDValue V2 = Op.getOperand(1);
1747 DebugLoc dl = Op.getDebugLoc();
1749 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1751 // If we have a single element being moved from V1 to V2, this can be handled
1752 // using the C*[DX] compute mask instructions, but the vector elements have
1753 // to be monotonically increasing with one exception element.
1754 MVT VecVT = V1.getValueType();
1755 MVT EltVT = VecVT.getVectorElementType();
1756 unsigned EltsFromV2 = 0;
1758 unsigned V2EltIdx0 = 0;
1759 unsigned CurrElt = 0;
1760 unsigned MaxElts = VecVT.getVectorNumElements();
1761 unsigned PrevElt = 0;
1763 bool monotonic = true;
1766 if (EltVT == MVT::i8) {
1768 } else if (EltVT == MVT::i16) {
1770 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1772 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1775 llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1777 for (unsigned i = 0; i != MaxElts; ++i) {
1778 if (SVN->getMaskElt(i) < 0)
1781 unsigned SrcElt = SVN->getMaskElt(i);
1784 if (SrcElt >= V2EltIdx0) {
1785 if (1 >= (++EltsFromV2)) {
1786 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1788 } else if (CurrElt != SrcElt) {
1796 if (PrevElt > 0 && SrcElt < MaxElts) {
1797 if ((PrevElt == SrcElt - 1)
1798 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1805 } else if (PrevElt == 0) {
1806 // First time through, need to keep track of previous element
1809 // This isn't a rotation, takes elements from vector 2
1815 if (EltsFromV2 == 1 && monotonic) {
1816 // Compute mask and shuffle
1817 MachineFunction &MF = DAG.getMachineFunction();
1818 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1819 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1820 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1821 // Initialize temporary register to 0
1822 SDValue InitTempReg =
1823 DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1824 // Copy register's contents as index in SHUFFLE_MASK:
1825 SDValue ShufMaskOp =
1826 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1827 DAG.getTargetConstant(V2Elt, MVT::i32),
1828 DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1829 // Use shuffle mask in SHUFB synthetic instruction:
1830 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1832 } else if (rotate) {
1833 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1835 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1836 V1, DAG.getConstant(rotamt, MVT::i16));
1838 // Convert the SHUFFLE_VECTOR mask's input element units to the
1840 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1842 SmallVector<SDValue, 16> ResultMask;
1843 for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1844 unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1846 for (unsigned j = 0; j < BytesPerElement; ++j)
1847 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1850 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1851 &ResultMask[0], ResultMask.size());
1852 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1856 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1857 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1858 DebugLoc dl = Op.getDebugLoc();
1860 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1861 // For a constant, build the appropriate constant vector, which will
1862 // eventually simplify to a vector register load.
1864 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1865 SmallVector<SDValue, 16> ConstVecValues;
1869 // Create a constant vector:
1870 switch (Op.getValueType().getSimpleVT()) {
1871 default: llvm_unreachable("Unexpected constant value type in "
1872 "LowerSCALAR_TO_VECTOR");
1873 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1874 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1875 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1876 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1877 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1878 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1881 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1882 for (size_t j = 0; j < n_copies; ++j)
1883 ConstVecValues.push_back(CValue);
1885 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1886 &ConstVecValues[0], ConstVecValues.size());
1888 // Otherwise, copy the value from one register to another:
1889 switch (Op0.getValueType().getSimpleVT()) {
1890 default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1897 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1904 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1905 MVT VT = Op.getValueType();
1906 SDValue N = Op.getOperand(0);
1907 SDValue Elt = Op.getOperand(1);
1908 DebugLoc dl = Op.getDebugLoc();
1911 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1912 // Constant argument:
1913 int EltNo = (int) C->getZExtValue();
1916 if (VT == MVT::i8 && EltNo >= 16)
1917 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1918 else if (VT == MVT::i16 && EltNo >= 8)
1919 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1920 else if (VT == MVT::i32 && EltNo >= 4)
1921 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1922 else if (VT == MVT::i64 && EltNo >= 2)
1923 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1925 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1926 // i32 and i64: Element 0 is the preferred slot
1927 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1930 // Need to generate shuffle mask and extract:
1931 int prefslot_begin = -1, prefslot_end = -1;
1932 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1934 switch (VT.getSimpleVT()) {
1936 assert(false && "Invalid value type!");
1938 prefslot_begin = prefslot_end = 3;
1942 prefslot_begin = 2; prefslot_end = 3;
1947 prefslot_begin = 0; prefslot_end = 3;
1952 prefslot_begin = 0; prefslot_end = 7;
1957 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1958 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1960 unsigned int ShufBytes[16];
1961 for (int i = 0; i < 16; ++i) {
1962 // zero fill uppper part of preferred slot, don't care about the
1964 unsigned int mask_val;
1965 if (i <= prefslot_end) {
1967 ((i < prefslot_begin)
1969 : elt_byte + (i - prefslot_begin));
1971 ShufBytes[i] = mask_val;
1973 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1976 SDValue ShufMask[4];
1977 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1978 unsigned bidx = i * 4;
1979 unsigned int bits = ((ShufBytes[bidx] << 24) |
1980 (ShufBytes[bidx+1] << 16) |
1981 (ShufBytes[bidx+2] << 8) |
1983 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1986 SDValue ShufMaskVec =
1987 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1988 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1990 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1991 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
1992 N, N, ShufMaskVec));
1994 // Variable index: Rotate the requested element into slot 0, then replicate
1995 // slot 0 across the vector
1996 MVT VecVT = N.getValueType();
1997 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
1998 llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2002 // Make life easier by making sure the index is zero-extended to i32
2003 if (Elt.getValueType() != MVT::i32)
2004 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2006 // Scale the index to a bit/byte shift quantity
2008 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2009 unsigned scaleShift = scaleFactor.logBase2();
2012 if (scaleShift > 0) {
2013 // Scale the shift factor:
2014 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2015 DAG.getConstant(scaleShift, MVT::i32));
2018 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2020 // Replicate the bytes starting at byte 0 across the entire vector (for
2021 // consistency with the notion of a unified register set)
2024 switch (VT.getSimpleVT()) {
2026 llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2030 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2031 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2032 factor, factor, factor, factor);
2036 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2037 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2038 factor, factor, factor, factor);
2043 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2044 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2045 factor, factor, factor, factor);
2050 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2051 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2052 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2053 loFactor, hiFactor, loFactor, hiFactor);
2058 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2059 DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2060 vecShift, vecShift, replicate));
2066 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2067 SDValue VecOp = Op.getOperand(0);
2068 SDValue ValOp = Op.getOperand(1);
2069 SDValue IdxOp = Op.getOperand(2);
2070 DebugLoc dl = Op.getDebugLoc();
2071 MVT VT = Op.getValueType();
2073 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2074 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2076 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2077 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2078 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2079 DAG.getRegister(SPU::R1, PtrVT),
2080 DAG.getConstant(CN->getSExtValue(), PtrVT));
2081 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2084 DAG.getNode(SPUISD::SHUFB, dl, VT,
2085 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2087 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2092 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2093 const TargetLowering &TLI)
2095 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2096 DebugLoc dl = Op.getDebugLoc();
2097 MVT ShiftVT = TLI.getShiftAmountTy();
2099 assert(Op.getValueType() == MVT::i8);
2102 llvm_unreachable("Unhandled i8 math operator");
2106 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2108 SDValue N1 = Op.getOperand(1);
2109 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2110 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2111 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2112 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2117 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2119 SDValue N1 = Op.getOperand(1);
2120 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2121 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2122 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2123 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2127 SDValue N1 = Op.getOperand(1);
2128 MVT N1VT = N1.getValueType();
2130 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2131 if (!N1VT.bitsEq(ShiftVT)) {
2132 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2135 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2138 // Replicate lower 8-bits into upper 8:
2140 DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2141 DAG.getNode(ISD::SHL, dl, MVT::i16,
2142 N0, DAG.getConstant(8, MVT::i32)));
2144 // Truncate back down to i8
2145 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2146 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2150 SDValue N1 = Op.getOperand(1);
2151 MVT N1VT = N1.getValueType();
2153 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2154 if (!N1VT.bitsEq(ShiftVT)) {
2155 unsigned N1Opc = ISD::ZERO_EXTEND;
2157 if (N1.getValueType().bitsGT(ShiftVT))
2158 N1Opc = ISD::TRUNCATE;
2160 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2163 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2164 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2167 SDValue N1 = Op.getOperand(1);
2168 MVT N1VT = N1.getValueType();
2170 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2171 if (!N1VT.bitsEq(ShiftVT)) {
2172 unsigned N1Opc = ISD::SIGN_EXTEND;
2174 if (N1VT.bitsGT(ShiftVT))
2175 N1Opc = ISD::TRUNCATE;
2176 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2179 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2180 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2183 SDValue N1 = Op.getOperand(1);
2185 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2186 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2187 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2188 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2196 //! Lower byte immediate operations for v16i8 vectors:
2198 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2201 MVT VT = Op.getValueType();
2202 DebugLoc dl = Op.getDebugLoc();
2204 ConstVec = Op.getOperand(0);
2205 Arg = Op.getOperand(1);
2206 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2207 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2208 ConstVec = ConstVec.getOperand(0);
2210 ConstVec = Op.getOperand(1);
2211 Arg = Op.getOperand(0);
2212 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2213 ConstVec = ConstVec.getOperand(0);
2218 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2219 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2220 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2222 APInt APSplatBits, APSplatUndef;
2223 unsigned SplatBitSize;
2225 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2227 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2228 HasAnyUndefs, minSplatBits)
2229 && minSplatBits <= SplatBitSize) {
2230 uint64_t SplatBits = APSplatBits.getZExtValue();
2231 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2233 SmallVector<SDValue, 16> tcVec;
2234 tcVec.assign(16, tc);
2235 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2236 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2240 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2241 // lowered. Return the operation, rather than a null SDValue.
2245 //! Custom lowering for CTPOP (count population)
2247 Custom lowering code that counts the number ones in the input
2248 operand. SPU has such an instruction, but it counts the number of
2249 ones per byte, which then have to be accumulated.
2251 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2252 MVT VT = Op.getValueType();
2253 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2254 DebugLoc dl = Op.getDebugLoc();
2256 switch (VT.getSimpleVT()) {
2258 assert(false && "Invalid value type!");
2260 SDValue N = Op.getOperand(0);
2261 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2263 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2264 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2266 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2270 MachineFunction &MF = DAG.getMachineFunction();
2271 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2273 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2275 SDValue N = Op.getOperand(0);
2276 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2277 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2278 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2280 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2281 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2283 // CNTB_result becomes the chain to which all of the virtual registers
2284 // CNTB_reg, SUM1_reg become associated:
2285 SDValue CNTB_result =
2286 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2288 SDValue CNTB_rescopy =
2289 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2291 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2293 return DAG.getNode(ISD::AND, dl, MVT::i16,
2294 DAG.getNode(ISD::ADD, dl, MVT::i16,
2295 DAG.getNode(ISD::SRL, dl, MVT::i16,
2302 MachineFunction &MF = DAG.getMachineFunction();
2303 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2305 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2306 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2308 SDValue N = Op.getOperand(0);
2309 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2310 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2311 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2312 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2314 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2315 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2317 // CNTB_result becomes the chain to which all of the virtual registers
2318 // CNTB_reg, SUM1_reg become associated:
2319 SDValue CNTB_result =
2320 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2322 SDValue CNTB_rescopy =
2323 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2326 DAG.getNode(ISD::SRL, dl, MVT::i32,
2327 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2331 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2332 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2334 SDValue Sum1_rescopy =
2335 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2338 DAG.getNode(ISD::SRL, dl, MVT::i32,
2339 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2342 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2343 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2345 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2355 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2357 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2358 All conversions to i64 are expanded to a libcall.
2360 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2361 SPUTargetLowering &TLI) {
2362 MVT OpVT = Op.getValueType();
2363 SDValue Op0 = Op.getOperand(0);
2364 MVT Op0VT = Op0.getValueType();
2366 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2367 || OpVT == MVT::i64) {
2368 // Convert f32 / f64 to i32 / i64 via libcall.
2370 (Op.getOpcode() == ISD::FP_TO_SINT)
2371 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2372 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2373 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2375 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2381 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2383 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2384 All conversions from i64 are expanded to a libcall.
2386 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2387 SPUTargetLowering &TLI) {
2388 MVT OpVT = Op.getValueType();
2389 SDValue Op0 = Op.getOperand(0);
2390 MVT Op0VT = Op0.getValueType();
2392 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2393 || Op0VT == MVT::i64) {
2394 // Convert i32, i64 to f64 via libcall:
2396 (Op.getOpcode() == ISD::SINT_TO_FP)
2397 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2398 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2399 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2401 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2407 //! Lower ISD::SETCC
2409 This handles MVT::f64 (double floating point) condition lowering
2411 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2412 const TargetLowering &TLI) {
2413 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2414 DebugLoc dl = Op.getDebugLoc();
2415 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2417 SDValue lhs = Op.getOperand(0);
2418 SDValue rhs = Op.getOperand(1);
2419 MVT lhsVT = lhs.getValueType();
2420 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2422 MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2423 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2424 MVT IntVT(MVT::i64);
2426 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2427 // selected to a NOP:
2428 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2430 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2431 DAG.getNode(ISD::SRL, dl, IntVT,
2432 i64lhs, DAG.getConstant(32, MVT::i32)));
2433 SDValue lhsHi32abs =
2434 DAG.getNode(ISD::AND, dl, MVT::i32,
2435 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2437 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2439 // SETO and SETUO only use the lhs operand:
2440 if (CC->get() == ISD::SETO) {
2441 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2443 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2444 return DAG.getNode(ISD::XOR, dl, ccResultVT,
2445 DAG.getSetCC(dl, ccResultVT,
2446 lhs, DAG.getConstantFP(0.0, lhsVT),
2448 DAG.getConstant(ccResultAllOnes, ccResultVT));
2449 } else if (CC->get() == ISD::SETUO) {
2450 // Evaluates to true if Op0 is [SQ]NaN
2451 return DAG.getNode(ISD::AND, dl, ccResultVT,
2452 DAG.getSetCC(dl, ccResultVT,
2454 DAG.getConstant(0x7ff00000, MVT::i32),
2456 DAG.getSetCC(dl, ccResultVT,
2458 DAG.getConstant(0, MVT::i32),
2462 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2464 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2465 DAG.getNode(ISD::SRL, dl, IntVT,
2466 i64rhs, DAG.getConstant(32, MVT::i32)));
2468 // If a value is negative, subtract from the sign magnitude constant:
2469 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2471 // Convert the sign-magnitude representation into 2's complement:
2472 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2473 lhsHi32, DAG.getConstant(31, MVT::i32));
2474 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2476 DAG.getNode(ISD::SELECT, dl, IntVT,
2477 lhsSelectMask, lhsSignMag2TC, i64lhs);
2479 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2480 rhsHi32, DAG.getConstant(31, MVT::i32));
2481 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2483 DAG.getNode(ISD::SELECT, dl, IntVT,
2484 rhsSelectMask, rhsSignMag2TC, i64rhs);
2488 switch (CC->get()) {
2491 compareOp = ISD::SETEQ; break;
2494 compareOp = ISD::SETGT; break;
2497 compareOp = ISD::SETGE; break;
2500 compareOp = ISD::SETLT; break;
2503 compareOp = ISD::SETLE; break;
2506 compareOp = ISD::SETNE; break;
2508 llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
2512 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2513 (ISD::CondCode) compareOp);
2515 if ((CC->get() & 0x8) == 0) {
2516 // Ordered comparison:
2517 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2518 lhs, DAG.getConstantFP(0.0, MVT::f64),
2520 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2521 rhs, DAG.getConstantFP(0.0, MVT::f64),
2523 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2525 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2531 //! Lower ISD::SELECT_CC
2533 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2536 \note Need to revisit this in the future: if the code path through the true
2537 and false value computations is longer than the latency of a branch (6
2538 cycles), then it would be more advantageous to branch and insert a new basic
2539 block and branch on the condition. However, this code does not make that
2540 assumption, given the simplisitc uses so far.
2543 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2544 const TargetLowering &TLI) {
2545 MVT VT = Op.getValueType();
2546 SDValue lhs = Op.getOperand(0);
2547 SDValue rhs = Op.getOperand(1);
2548 SDValue trueval = Op.getOperand(2);
2549 SDValue falseval = Op.getOperand(3);
2550 SDValue condition = Op.getOperand(4);
2551 DebugLoc dl = Op.getDebugLoc();
2553 // NOTE: SELB's arguments: $rA, $rB, $mask
2555 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2556 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2557 // condition was true and 0s where the condition was false. Hence, the
2558 // arguments to SELB get reversed.
2560 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2561 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2562 // with another "cannot select select_cc" assert:
2564 SDValue compare = DAG.getNode(ISD::SETCC, dl,
2565 TLI.getSetCCResultType(Op.getValueType()),
2566 lhs, rhs, condition);
2567 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2570 //! Custom lower ISD::TRUNCATE
2571 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2573 // Type to truncate to
2574 MVT VT = Op.getValueType();
2575 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2576 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2577 DebugLoc dl = Op.getDebugLoc();
2579 // Type to truncate from
2580 SDValue Op0 = Op.getOperand(0);
2581 MVT Op0VT = Op0.getValueType();
2583 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2584 // Create shuffle mask, least significant doubleword of quadword
2585 unsigned maskHigh = 0x08090a0b;
2586 unsigned maskLow = 0x0c0d0e0f;
2587 // Use a shuffle to perform the truncation
2588 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2589 DAG.getConstant(maskHigh, MVT::i32),
2590 DAG.getConstant(maskLow, MVT::i32),
2591 DAG.getConstant(maskHigh, MVT::i32),
2592 DAG.getConstant(maskLow, MVT::i32));
2594 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2595 Op0, Op0, shufMask);
2597 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2600 return SDValue(); // Leave the truncate unmolested
2603 //! Custom (target-specific) lowering entry point
2605 This is where LLVM's DAG selection process calls to do target-specific
2609 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2611 unsigned Opc = (unsigned) Op.getOpcode();
2612 MVT VT = Op.getValueType();
2617 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2618 cerr << "Op.getOpcode() = " << Opc << "\n";
2619 cerr << "*Op.getNode():\n";
2620 Op.getNode()->dump();
2622 llvm_unreachable(0);
2628 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2630 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2631 case ISD::ConstantPool:
2632 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2633 case ISD::GlobalAddress:
2634 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2635 case ISD::JumpTable:
2636 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2637 case ISD::ConstantFP:
2638 return LowerConstantFP(Op, DAG);
2640 // i8, i64 math ops:
2649 return LowerI8Math(Op, DAG, Opc, *this);
2653 case ISD::FP_TO_SINT:
2654 case ISD::FP_TO_UINT:
2655 return LowerFP_TO_INT(Op, DAG, *this);
2657 case ISD::SINT_TO_FP:
2658 case ISD::UINT_TO_FP:
2659 return LowerINT_TO_FP(Op, DAG, *this);
2661 // Vector-related lowering.
2662 case ISD::BUILD_VECTOR:
2663 return LowerBUILD_VECTOR(Op, DAG);
2664 case ISD::SCALAR_TO_VECTOR:
2665 return LowerSCALAR_TO_VECTOR(Op, DAG);
2666 case ISD::VECTOR_SHUFFLE:
2667 return LowerVECTOR_SHUFFLE(Op, DAG);
2668 case ISD::EXTRACT_VECTOR_ELT:
2669 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2670 case ISD::INSERT_VECTOR_ELT:
2671 return LowerINSERT_VECTOR_ELT(Op, DAG);
2673 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2677 return LowerByteImmed(Op, DAG);
2679 // Vector and i8 multiply:
2682 return LowerI8Math(Op, DAG, Opc, *this);
2685 return LowerCTPOP(Op, DAG);
2687 case ISD::SELECT_CC:
2688 return LowerSELECT_CC(Op, DAG, *this);
2691 return LowerSETCC(Op, DAG, *this);
2694 return LowerTRUNCATE(Op, DAG);
2700 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2701 SmallVectorImpl<SDValue>&Results,
2705 unsigned Opc = (unsigned) N->getOpcode();
2706 MVT OpVT = N->getValueType(0);
2710 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2711 cerr << "Op.getOpcode() = " << Opc << "\n";
2712 cerr << "*Op.getNode():\n";
2720 /* Otherwise, return unchanged */
2723 //===----------------------------------------------------------------------===//
2724 // Target Optimization Hooks
2725 //===----------------------------------------------------------------------===//
2728 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2731 TargetMachine &TM = getTargetMachine();
2733 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2734 SelectionDAG &DAG = DCI.DAG;
2735 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2736 MVT NodeVT = N->getValueType(0); // The node's value type
2737 MVT Op0VT = Op0.getValueType(); // The first operand's result
2738 SDValue Result; // Initially, empty result
2739 DebugLoc dl = N->getDebugLoc();
2741 switch (N->getOpcode()) {
2744 SDValue Op1 = N->getOperand(1);
2746 if (Op0.getOpcode() == SPUISD::IndirectAddr
2747 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2748 // Normalize the operands to reduce repeated code
2749 SDValue IndirectArg = Op0, AddArg = Op1;
2751 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2756 if (isa<ConstantSDNode>(AddArg)) {
2757 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2758 SDValue IndOp1 = IndirectArg.getOperand(1);
2760 if (CN0->isNullValue()) {
2761 // (add (SPUindirect <arg>, <arg>), 0) ->
2762 // (SPUindirect <arg>, <arg>)
2764 #if !defined(NDEBUG)
2765 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2767 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2768 << "With: (SPUindirect <arg>, <arg>)\n";
2773 } else if (isa<ConstantSDNode>(IndOp1)) {
2774 // (add (SPUindirect <arg>, <const>), <const>) ->
2775 // (SPUindirect <arg>, <const + const>)
2776 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2777 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2778 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2780 #if !defined(NDEBUG)
2781 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2783 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2784 << "), " << CN0->getSExtValue() << ")\n"
2785 << "With: (SPUindirect <arg>, "
2786 << combinedConst << ")\n";
2790 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2791 IndirectArg, combinedValue);
2797 case ISD::SIGN_EXTEND:
2798 case ISD::ZERO_EXTEND:
2799 case ISD::ANY_EXTEND: {
2800 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2801 // (any_extend (SPUextract_elt0 <arg>)) ->
2802 // (SPUextract_elt0 <arg>)
2803 // Types must match, however...
2804 #if !defined(NDEBUG)
2805 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2806 cerr << "\nReplace: ";
2809 Op0.getNode()->dump(&DAG);
2818 case SPUISD::IndirectAddr: {
2819 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2820 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2821 if (CN != 0 && CN->getZExtValue() == 0) {
2822 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2823 // (SPUaform <addr>, 0)
2825 DEBUG(cerr << "Replace: ");
2826 DEBUG(N->dump(&DAG));
2827 DEBUG(cerr << "\nWith: ");
2828 DEBUG(Op0.getNode()->dump(&DAG));
2829 DEBUG(cerr << "\n");
2833 } else if (Op0.getOpcode() == ISD::ADD) {
2834 SDValue Op1 = N->getOperand(1);
2835 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2836 // (SPUindirect (add <arg>, <arg>), 0) ->
2837 // (SPUindirect <arg>, <arg>)
2838 if (CN1->isNullValue()) {
2840 #if !defined(NDEBUG)
2841 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2843 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2844 << "With: (SPUindirect <arg>, <arg>)\n";
2848 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2849 Op0.getOperand(0), Op0.getOperand(1));
2855 case SPUISD::SHLQUAD_L_BITS:
2856 case SPUISD::SHLQUAD_L_BYTES:
2857 case SPUISD::VEC_SHL:
2858 case SPUISD::VEC_SRL:
2859 case SPUISD::VEC_SRA:
2860 case SPUISD::ROTBYTES_LEFT: {
2861 SDValue Op1 = N->getOperand(1);
2863 // Kill degenerate vector shifts:
2864 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2865 if (CN->isNullValue()) {
2871 case SPUISD::PREFSLOT2VEC: {
2872 switch (Op0.getOpcode()) {
2875 case ISD::ANY_EXTEND:
2876 case ISD::ZERO_EXTEND:
2877 case ISD::SIGN_EXTEND: {
2878 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2880 // but only if the SPUprefslot2vec and <arg> types match.
2881 SDValue Op00 = Op0.getOperand(0);
2882 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2883 SDValue Op000 = Op00.getOperand(0);
2884 if (Op000.getValueType() == NodeVT) {
2890 case SPUISD::VEC2PREFSLOT: {
2891 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2893 Result = Op0.getOperand(0);
2901 // Otherwise, return unchanged.
2903 if (Result.getNode()) {
2904 DEBUG(cerr << "\nReplace.SPU: ");
2905 DEBUG(N->dump(&DAG));
2906 DEBUG(cerr << "\nWith: ");
2907 DEBUG(Result.getNode()->dump(&DAG));
2908 DEBUG(cerr << "\n");
2915 //===----------------------------------------------------------------------===//
2916 // Inline Assembly Support
2917 //===----------------------------------------------------------------------===//
2919 /// getConstraintType - Given a constraint letter, return the type of
2920 /// constraint it is for this target.
2921 SPUTargetLowering::ConstraintType
2922 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2923 if (ConstraintLetter.size() == 1) {
2924 switch (ConstraintLetter[0]) {
2931 return C_RegisterClass;
2934 return TargetLowering::getConstraintType(ConstraintLetter);
2937 std::pair<unsigned, const TargetRegisterClass*>
2938 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2941 if (Constraint.size() == 1) {
2942 // GCC RS6000 Constraint Letters
2943 switch (Constraint[0]) {
2947 return std::make_pair(0U, SPU::R64CRegisterClass);
2948 return std::make_pair(0U, SPU::R32CRegisterClass);
2951 return std::make_pair(0U, SPU::R32FPRegisterClass);
2952 else if (VT == MVT::f64)
2953 return std::make_pair(0U, SPU::R64FPRegisterClass);
2956 return std::make_pair(0U, SPU::GPRCRegisterClass);
2960 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2963 //! Compute used/known bits for a SPU operand
2965 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2969 const SelectionDAG &DAG,
2970 unsigned Depth ) const {
2972 const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
2974 switch (Op.getOpcode()) {
2976 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2982 case SPUISD::PREFSLOT2VEC:
2983 case SPUISD::LDRESULT:
2984 case SPUISD::VEC2PREFSLOT:
2985 case SPUISD::SHLQUAD_L_BITS:
2986 case SPUISD::SHLQUAD_L_BYTES:
2987 case SPUISD::VEC_SHL:
2988 case SPUISD::VEC_SRL:
2989 case SPUISD::VEC_SRA:
2990 case SPUISD::VEC_ROTL:
2991 case SPUISD::VEC_ROTR:
2992 case SPUISD::ROTBYTES_LEFT:
2993 case SPUISD::SELECT_MASK:
3000 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3001 unsigned Depth) const {
3002 switch (Op.getOpcode()) {
3007 MVT VT = Op.getValueType();
3009 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3012 return VT.getSizeInBits();
3017 // LowerAsmOperandForConstraint
3019 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3020 char ConstraintLetter,
3022 std::vector<SDValue> &Ops,
3023 SelectionDAG &DAG) const {
3024 // Default, for the time being, to the base class handler
3025 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3029 /// isLegalAddressImmediate - Return true if the integer value can be used
3030 /// as the offset of the target addressing mode.
3031 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3032 const Type *Ty) const {
3033 // SPU's addresses are 256K:
3034 return (V > -(1 << 18) && V < (1 << 18) - 1);
3037 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3042 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3043 // The SPU target isn't yet aware of offsets.