2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/Constants.h"
19 #include "llvm/Function.h"
20 #include "llvm/Intrinsics.h"
21 #include "llvm/CallingConv.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/SelectionDAG.h"
28 #include "llvm/Target/TargetLoweringObjectFile.h"
29 #include "llvm/Target/TargetOptions.h"
30 #include "llvm/ADT/VectorExtras.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/MathExtras.h"
34 #include "llvm/Support/raw_ostream.h"
39 // Used in getTargetNodeName() below
41 std::map<unsigned, const char *> node_names;
43 //! EVT mapping to useful data for Cell SPU
44 struct valtype_map_s {
46 const int prefslot_byte;
49 const valtype_map_s valtype_map[] = {
60 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
62 const valtype_map_s *getValueTypeMapEntry(EVT VT) {
63 const valtype_map_s *retval = 0;
65 for (size_t i = 0; i < n_valtype_map; ++i) {
66 if (valtype_map[i].valtype == VT) {
67 retval = valtype_map + i;
75 raw_string_ostream Msg(msg);
76 Msg << "getValueTypeMapEntry returns NULL for "
78 llvm_report_error(Msg.str());
85 //! Expand a library call into an actual call DAG node
88 This code is taken from SelectionDAGLegalize, since it is not exposed as
89 part of the LLVM SelectionDAG API.
93 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
94 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
95 // The input chain to this libcall is the entry node of the function.
96 // Legalizing the call will automatically add the previous call to the
98 SDValue InChain = DAG.getEntryNode();
100 TargetLowering::ArgListTy Args;
101 TargetLowering::ArgListEntry Entry;
102 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
103 EVT ArgVT = Op.getOperand(i).getValueType();
104 const Type *ArgTy = ArgVT.getTypeForEVT();
105 Entry.Node = Op.getOperand(i);
107 Entry.isSExt = isSigned;
108 Entry.isZExt = !isSigned;
109 Args.push_back(Entry);
111 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
114 // Splice the libcall in wherever FindInputOutputChains tells us to.
115 const Type *RetTy = Op.getNode()->getValueType(0).getTypeForEVT();
116 std::pair<SDValue, SDValue> CallInfo =
117 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
118 0, CallingConv::C, false,
119 /*isReturnValueUsed=*/true,
123 return CallInfo.first;
127 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
128 : TargetLowering(TM, new TargetLoweringObjectFileELF()),
130 // Fold away setcc operations if possible.
133 // Use _setjmp/_longjmp instead of setjmp/longjmp.
134 setUseUnderscoreSetJmp(true);
135 setUseUnderscoreLongJmp(true);
137 // Set RTLIB libcall names as used by SPU:
138 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
140 // Set up the SPU's register classes:
141 addRegisterClass(EVT::i8, SPU::R8CRegisterClass);
142 addRegisterClass(EVT::i16, SPU::R16CRegisterClass);
143 addRegisterClass(EVT::i32, SPU::R32CRegisterClass);
144 addRegisterClass(EVT::i64, SPU::R64CRegisterClass);
145 addRegisterClass(EVT::f32, SPU::R32FPRegisterClass);
146 addRegisterClass(EVT::f64, SPU::R64FPRegisterClass);
147 addRegisterClass(EVT::i128, SPU::GPRCRegisterClass);
149 // SPU has no sign or zero extended loads for i1, i8, i16:
150 setLoadExtAction(ISD::EXTLOAD, EVT::i1, Promote);
151 setLoadExtAction(ISD::SEXTLOAD, EVT::i1, Promote);
152 setLoadExtAction(ISD::ZEXTLOAD, EVT::i1, Promote);
154 setLoadExtAction(ISD::EXTLOAD, EVT::f32, Expand);
155 setLoadExtAction(ISD::EXTLOAD, EVT::f64, Expand);
157 setTruncStoreAction(EVT::i128, EVT::i64, Expand);
158 setTruncStoreAction(EVT::i128, EVT::i32, Expand);
159 setTruncStoreAction(EVT::i128, EVT::i16, Expand);
160 setTruncStoreAction(EVT::i128, EVT::i8, Expand);
162 setTruncStoreAction(EVT::f64, EVT::f32, Expand);
164 // SPU constant load actions are custom lowered:
165 setOperationAction(ISD::ConstantFP, EVT::f32, Legal);
166 setOperationAction(ISD::ConstantFP, EVT::f64, Custom);
168 // SPU's loads and stores have to be custom lowered:
169 for (unsigned sctype = (unsigned) EVT::i8; sctype < (unsigned) EVT::i128;
171 EVT::SimpleValueType VT = (EVT::SimpleValueType)sctype;
173 setOperationAction(ISD::LOAD, VT, Custom);
174 setOperationAction(ISD::STORE, VT, Custom);
175 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
176 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
177 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
179 for (unsigned stype = sctype - 1; stype >= (unsigned) EVT::i8; --stype) {
180 EVT::SimpleValueType StoreVT = (EVT::SimpleValueType) stype;
181 setTruncStoreAction(VT, StoreVT, Expand);
185 for (unsigned sctype = (unsigned) EVT::f32; sctype < (unsigned) EVT::f64;
187 EVT::SimpleValueType VT = (EVT::SimpleValueType) sctype;
189 setOperationAction(ISD::LOAD, VT, Custom);
190 setOperationAction(ISD::STORE, VT, Custom);
192 for (unsigned stype = sctype - 1; stype >= (unsigned) EVT::f32; --stype) {
193 EVT::SimpleValueType StoreVT = (EVT::SimpleValueType) stype;
194 setTruncStoreAction(VT, StoreVT, Expand);
198 // Expand the jumptable branches
199 setOperationAction(ISD::BR_JT, EVT::Other, Expand);
200 setOperationAction(ISD::BR_CC, EVT::Other, Expand);
202 // Custom lower SELECT_CC for most cases, but expand by default
203 setOperationAction(ISD::SELECT_CC, EVT::Other, Expand);
204 setOperationAction(ISD::SELECT_CC, EVT::i8, Custom);
205 setOperationAction(ISD::SELECT_CC, EVT::i16, Custom);
206 setOperationAction(ISD::SELECT_CC, EVT::i32, Custom);
207 setOperationAction(ISD::SELECT_CC, EVT::i64, Custom);
209 // SPU has no intrinsics for these particular operations:
210 setOperationAction(ISD::MEMBARRIER, EVT::Other, Expand);
212 // SPU has no division/remainder instructions
213 setOperationAction(ISD::SREM, EVT::i8, Expand);
214 setOperationAction(ISD::UREM, EVT::i8, Expand);
215 setOperationAction(ISD::SDIV, EVT::i8, Expand);
216 setOperationAction(ISD::UDIV, EVT::i8, Expand);
217 setOperationAction(ISD::SDIVREM, EVT::i8, Expand);
218 setOperationAction(ISD::UDIVREM, EVT::i8, Expand);
219 setOperationAction(ISD::SREM, EVT::i16, Expand);
220 setOperationAction(ISD::UREM, EVT::i16, Expand);
221 setOperationAction(ISD::SDIV, EVT::i16, Expand);
222 setOperationAction(ISD::UDIV, EVT::i16, Expand);
223 setOperationAction(ISD::SDIVREM, EVT::i16, Expand);
224 setOperationAction(ISD::UDIVREM, EVT::i16, Expand);
225 setOperationAction(ISD::SREM, EVT::i32, Expand);
226 setOperationAction(ISD::UREM, EVT::i32, Expand);
227 setOperationAction(ISD::SDIV, EVT::i32, Expand);
228 setOperationAction(ISD::UDIV, EVT::i32, Expand);
229 setOperationAction(ISD::SDIVREM, EVT::i32, Expand);
230 setOperationAction(ISD::UDIVREM, EVT::i32, Expand);
231 setOperationAction(ISD::SREM, EVT::i64, Expand);
232 setOperationAction(ISD::UREM, EVT::i64, Expand);
233 setOperationAction(ISD::SDIV, EVT::i64, Expand);
234 setOperationAction(ISD::UDIV, EVT::i64, Expand);
235 setOperationAction(ISD::SDIVREM, EVT::i64, Expand);
236 setOperationAction(ISD::UDIVREM, EVT::i64, Expand);
237 setOperationAction(ISD::SREM, EVT::i128, Expand);
238 setOperationAction(ISD::UREM, EVT::i128, Expand);
239 setOperationAction(ISD::SDIV, EVT::i128, Expand);
240 setOperationAction(ISD::UDIV, EVT::i128, Expand);
241 setOperationAction(ISD::SDIVREM, EVT::i128, Expand);
242 setOperationAction(ISD::UDIVREM, EVT::i128, Expand);
244 // We don't support sin/cos/sqrt/fmod
245 setOperationAction(ISD::FSIN , EVT::f64, Expand);
246 setOperationAction(ISD::FCOS , EVT::f64, Expand);
247 setOperationAction(ISD::FREM , EVT::f64, Expand);
248 setOperationAction(ISD::FSIN , EVT::f32, Expand);
249 setOperationAction(ISD::FCOS , EVT::f32, Expand);
250 setOperationAction(ISD::FREM , EVT::f32, Expand);
252 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
254 setOperationAction(ISD::FSQRT, EVT::f64, Expand);
255 setOperationAction(ISD::FSQRT, EVT::f32, Expand);
257 setOperationAction(ISD::FCOPYSIGN, EVT::f64, Expand);
258 setOperationAction(ISD::FCOPYSIGN, EVT::f32, Expand);
260 // SPU can do rotate right and left, so legalize it... but customize for i8
261 // because instructions don't exist.
263 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
265 setOperationAction(ISD::ROTR, EVT::i32, Expand /*Legal*/);
266 setOperationAction(ISD::ROTR, EVT::i16, Expand /*Legal*/);
267 setOperationAction(ISD::ROTR, EVT::i8, Expand /*Custom*/);
269 setOperationAction(ISD::ROTL, EVT::i32, Legal);
270 setOperationAction(ISD::ROTL, EVT::i16, Legal);
271 setOperationAction(ISD::ROTL, EVT::i8, Custom);
273 // SPU has no native version of shift left/right for i8
274 setOperationAction(ISD::SHL, EVT::i8, Custom);
275 setOperationAction(ISD::SRL, EVT::i8, Custom);
276 setOperationAction(ISD::SRA, EVT::i8, Custom);
278 // Make these operations legal and handle them during instruction selection:
279 setOperationAction(ISD::SHL, EVT::i64, Legal);
280 setOperationAction(ISD::SRL, EVT::i64, Legal);
281 setOperationAction(ISD::SRA, EVT::i64, Legal);
283 // Custom lower i8, i32 and i64 multiplications
284 setOperationAction(ISD::MUL, EVT::i8, Custom);
285 setOperationAction(ISD::MUL, EVT::i32, Legal);
286 setOperationAction(ISD::MUL, EVT::i64, Legal);
288 // Expand double-width multiplication
289 // FIXME: It would probably be reasonable to support some of these operations
290 setOperationAction(ISD::UMUL_LOHI, EVT::i8, Expand);
291 setOperationAction(ISD::SMUL_LOHI, EVT::i8, Expand);
292 setOperationAction(ISD::MULHU, EVT::i8, Expand);
293 setOperationAction(ISD::MULHS, EVT::i8, Expand);
294 setOperationAction(ISD::UMUL_LOHI, EVT::i16, Expand);
295 setOperationAction(ISD::SMUL_LOHI, EVT::i16, Expand);
296 setOperationAction(ISD::MULHU, EVT::i16, Expand);
297 setOperationAction(ISD::MULHS, EVT::i16, Expand);
298 setOperationAction(ISD::UMUL_LOHI, EVT::i32, Expand);
299 setOperationAction(ISD::SMUL_LOHI, EVT::i32, Expand);
300 setOperationAction(ISD::MULHU, EVT::i32, Expand);
301 setOperationAction(ISD::MULHS, EVT::i32, Expand);
302 setOperationAction(ISD::UMUL_LOHI, EVT::i64, Expand);
303 setOperationAction(ISD::SMUL_LOHI, EVT::i64, Expand);
304 setOperationAction(ISD::MULHU, EVT::i64, Expand);
305 setOperationAction(ISD::MULHS, EVT::i64, Expand);
307 // Need to custom handle (some) common i8, i64 math ops
308 setOperationAction(ISD::ADD, EVT::i8, Custom);
309 setOperationAction(ISD::ADD, EVT::i64, Legal);
310 setOperationAction(ISD::SUB, EVT::i8, Custom);
311 setOperationAction(ISD::SUB, EVT::i64, Legal);
313 // SPU does not have BSWAP. It does have i32 support CTLZ.
314 // CTPOP has to be custom lowered.
315 setOperationAction(ISD::BSWAP, EVT::i32, Expand);
316 setOperationAction(ISD::BSWAP, EVT::i64, Expand);
318 setOperationAction(ISD::CTPOP, EVT::i8, Custom);
319 setOperationAction(ISD::CTPOP, EVT::i16, Custom);
320 setOperationAction(ISD::CTPOP, EVT::i32, Custom);
321 setOperationAction(ISD::CTPOP, EVT::i64, Custom);
322 setOperationAction(ISD::CTPOP, EVT::i128, Expand);
324 setOperationAction(ISD::CTTZ , EVT::i8, Expand);
325 setOperationAction(ISD::CTTZ , EVT::i16, Expand);
326 setOperationAction(ISD::CTTZ , EVT::i32, Expand);
327 setOperationAction(ISD::CTTZ , EVT::i64, Expand);
328 setOperationAction(ISD::CTTZ , EVT::i128, Expand);
330 setOperationAction(ISD::CTLZ , EVT::i8, Promote);
331 setOperationAction(ISD::CTLZ , EVT::i16, Promote);
332 setOperationAction(ISD::CTLZ , EVT::i32, Legal);
333 setOperationAction(ISD::CTLZ , EVT::i64, Expand);
334 setOperationAction(ISD::CTLZ , EVT::i128, Expand);
336 // SPU has a version of select that implements (a&~c)|(b&c), just like
337 // select ought to work:
338 setOperationAction(ISD::SELECT, EVT::i8, Legal);
339 setOperationAction(ISD::SELECT, EVT::i16, Legal);
340 setOperationAction(ISD::SELECT, EVT::i32, Legal);
341 setOperationAction(ISD::SELECT, EVT::i64, Legal);
343 setOperationAction(ISD::SETCC, EVT::i8, Legal);
344 setOperationAction(ISD::SETCC, EVT::i16, Legal);
345 setOperationAction(ISD::SETCC, EVT::i32, Legal);
346 setOperationAction(ISD::SETCC, EVT::i64, Legal);
347 setOperationAction(ISD::SETCC, EVT::f64, Custom);
349 // Custom lower i128 -> i64 truncates
350 setOperationAction(ISD::TRUNCATE, EVT::i64, Custom);
352 setOperationAction(ISD::FP_TO_SINT, EVT::i8, Promote);
353 setOperationAction(ISD::FP_TO_UINT, EVT::i8, Promote);
354 setOperationAction(ISD::FP_TO_SINT, EVT::i16, Promote);
355 setOperationAction(ISD::FP_TO_UINT, EVT::i16, Promote);
356 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
357 // to expand to a libcall, hence the custom lowering:
358 setOperationAction(ISD::FP_TO_SINT, EVT::i32, Custom);
359 setOperationAction(ISD::FP_TO_UINT, EVT::i32, Custom);
360 setOperationAction(ISD::FP_TO_SINT, EVT::i64, Expand);
361 setOperationAction(ISD::FP_TO_UINT, EVT::i64, Expand);
362 setOperationAction(ISD::FP_TO_SINT, EVT::i128, Expand);
363 setOperationAction(ISD::FP_TO_UINT, EVT::i128, Expand);
365 // FDIV on SPU requires custom lowering
366 setOperationAction(ISD::FDIV, EVT::f64, Expand); // to libcall
368 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
369 setOperationAction(ISD::SINT_TO_FP, EVT::i32, Custom);
370 setOperationAction(ISD::SINT_TO_FP, EVT::i16, Promote);
371 setOperationAction(ISD::SINT_TO_FP, EVT::i8, Promote);
372 setOperationAction(ISD::UINT_TO_FP, EVT::i32, Custom);
373 setOperationAction(ISD::UINT_TO_FP, EVT::i16, Promote);
374 setOperationAction(ISD::UINT_TO_FP, EVT::i8, Promote);
375 setOperationAction(ISD::SINT_TO_FP, EVT::i64, Custom);
376 setOperationAction(ISD::UINT_TO_FP, EVT::i64, Custom);
378 setOperationAction(ISD::BIT_CONVERT, EVT::i32, Legal);
379 setOperationAction(ISD::BIT_CONVERT, EVT::f32, Legal);
380 setOperationAction(ISD::BIT_CONVERT, EVT::i64, Legal);
381 setOperationAction(ISD::BIT_CONVERT, EVT::f64, Legal);
383 // We cannot sextinreg(i1). Expand to shifts.
384 setOperationAction(ISD::SIGN_EXTEND_INREG, EVT::i1, Expand);
386 // Support label based line numbers.
387 setOperationAction(ISD::DBG_STOPPOINT, EVT::Other, Expand);
388 setOperationAction(ISD::DEBUG_LOC, EVT::Other, Expand);
390 // We want to legalize GlobalAddress and ConstantPool nodes into the
391 // appropriate instructions to materialize the address.
392 for (unsigned sctype = (unsigned) EVT::i8; sctype < (unsigned) EVT::f128;
394 EVT::SimpleValueType VT = (EVT::SimpleValueType)sctype;
396 setOperationAction(ISD::GlobalAddress, VT, Custom);
397 setOperationAction(ISD::ConstantPool, VT, Custom);
398 setOperationAction(ISD::JumpTable, VT, Custom);
401 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
402 setOperationAction(ISD::VASTART , EVT::Other, Custom);
404 // Use the default implementation.
405 setOperationAction(ISD::VAARG , EVT::Other, Expand);
406 setOperationAction(ISD::VACOPY , EVT::Other, Expand);
407 setOperationAction(ISD::VAEND , EVT::Other, Expand);
408 setOperationAction(ISD::STACKSAVE , EVT::Other, Expand);
409 setOperationAction(ISD::STACKRESTORE , EVT::Other, Expand);
410 setOperationAction(ISD::DYNAMIC_STACKALLOC, EVT::i32 , Expand);
411 setOperationAction(ISD::DYNAMIC_STACKALLOC, EVT::i64 , Expand);
413 // Cell SPU has instructions for converting between i64 and fp.
414 setOperationAction(ISD::FP_TO_SINT, EVT::i64, Custom);
415 setOperationAction(ISD::SINT_TO_FP, EVT::i64, Custom);
417 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
418 setOperationAction(ISD::FP_TO_UINT, EVT::i32, Promote);
420 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
421 setOperationAction(ISD::BUILD_PAIR, EVT::i64, Expand);
423 // First set operation action for all vector types to expand. Then we
424 // will selectively turn on ones that can be effectively codegen'd.
425 addRegisterClass(EVT::v16i8, SPU::VECREGRegisterClass);
426 addRegisterClass(EVT::v8i16, SPU::VECREGRegisterClass);
427 addRegisterClass(EVT::v4i32, SPU::VECREGRegisterClass);
428 addRegisterClass(EVT::v2i64, SPU::VECREGRegisterClass);
429 addRegisterClass(EVT::v4f32, SPU::VECREGRegisterClass);
430 addRegisterClass(EVT::v2f64, SPU::VECREGRegisterClass);
432 // "Odd size" vector classes that we're willing to support:
433 addRegisterClass(EVT::v2i32, SPU::VECREGRegisterClass);
435 for (unsigned i = (unsigned)EVT::FIRST_VECTOR_VALUETYPE;
436 i <= (unsigned)EVT::LAST_VECTOR_VALUETYPE; ++i) {
437 EVT::SimpleValueType VT = (EVT::SimpleValueType)i;
439 // add/sub are legal for all supported vector VT's.
440 setOperationAction(ISD::ADD, VT, Legal);
441 setOperationAction(ISD::SUB, VT, Legal);
442 // mul has to be custom lowered.
443 setOperationAction(ISD::MUL, VT, Legal);
445 setOperationAction(ISD::AND, VT, Legal);
446 setOperationAction(ISD::OR, VT, Legal);
447 setOperationAction(ISD::XOR, VT, Legal);
448 setOperationAction(ISD::LOAD, VT, Legal);
449 setOperationAction(ISD::SELECT, VT, Legal);
450 setOperationAction(ISD::STORE, VT, Legal);
452 // These operations need to be expanded:
453 setOperationAction(ISD::SDIV, VT, Expand);
454 setOperationAction(ISD::SREM, VT, Expand);
455 setOperationAction(ISD::UDIV, VT, Expand);
456 setOperationAction(ISD::UREM, VT, Expand);
458 // Custom lower build_vector, constant pool spills, insert and
459 // extract vector elements:
460 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
461 setOperationAction(ISD::ConstantPool, VT, Custom);
462 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
463 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
464 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
465 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
468 setOperationAction(ISD::AND, EVT::v16i8, Custom);
469 setOperationAction(ISD::OR, EVT::v16i8, Custom);
470 setOperationAction(ISD::XOR, EVT::v16i8, Custom);
471 setOperationAction(ISD::SCALAR_TO_VECTOR, EVT::v4f32, Custom);
473 setOperationAction(ISD::FDIV, EVT::v4f32, Legal);
475 setShiftAmountType(EVT::i32);
476 setBooleanContents(ZeroOrNegativeOneBooleanContent);
478 setStackPointerRegisterToSaveRestore(SPU::R1);
480 // We have target-specific dag combine patterns for the following nodes:
481 setTargetDAGCombine(ISD::ADD);
482 setTargetDAGCombine(ISD::ZERO_EXTEND);
483 setTargetDAGCombine(ISD::SIGN_EXTEND);
484 setTargetDAGCombine(ISD::ANY_EXTEND);
486 computeRegisterProperties();
488 // Set pre-RA register scheduler default to BURR, which produces slightly
489 // better code than the default (could also be TDRR, but TargetLowering.h
490 // needs a mod to support that model):
491 setSchedulingPreference(SchedulingForRegPressure);
495 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
497 if (node_names.empty()) {
498 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
499 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
500 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
501 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
502 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
503 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
504 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
505 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
506 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
507 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
508 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
509 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
510 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
511 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
512 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
513 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
514 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
515 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
516 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
517 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
518 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
519 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
520 "SPUISD::ROTBYTES_LEFT_BITS";
521 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
522 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
523 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
524 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
525 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
528 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
530 return ((i != node_names.end()) ? i->second : 0);
533 /// getFunctionAlignment - Return the Log2 alignment of this function.
534 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
538 //===----------------------------------------------------------------------===//
539 // Return the Cell SPU's SETCC result type
540 //===----------------------------------------------------------------------===//
542 EVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
543 // i16 and i32 are valid SETCC result types
544 return ((VT == EVT::i8 || VT == EVT::i16 || VT == EVT::i32) ?
549 //===----------------------------------------------------------------------===//
550 // Calling convention code:
551 //===----------------------------------------------------------------------===//
553 #include "SPUGenCallingConv.inc"
555 //===----------------------------------------------------------------------===//
556 // LowerOperation implementation
557 //===----------------------------------------------------------------------===//
559 /// Custom lower loads for CellSPU
561 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
562 within a 16-byte block, we have to rotate to extract the requested element.
564 For extending loads, we also want to ensure that the following sequence is
565 emitted, e.g. for EVT::f32 extending load to EVT::f64:
569 %2 v16i8,ch = rotate %1
570 %3 v4f8, ch = bitconvert %2
571 %4 f32 = vec2perfslot %3
572 %5 f64 = fp_extend %4
576 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
577 LoadSDNode *LN = cast<LoadSDNode>(Op);
578 SDValue the_chain = LN->getChain();
579 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
580 EVT InVT = LN->getMemoryVT();
581 EVT OutVT = Op.getValueType();
582 ISD::LoadExtType ExtType = LN->getExtensionType();
583 unsigned alignment = LN->getAlignment();
584 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
585 DebugLoc dl = Op.getDebugLoc();
587 switch (LN->getAddressingMode()) {
588 case ISD::UNINDEXED: {
590 SDValue basePtr = LN->getBasePtr();
593 if (alignment == 16) {
596 // Special cases for a known aligned load to simplify the base pointer
597 // and the rotation amount:
598 if (basePtr.getOpcode() == ISD::ADD
599 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
600 // Known offset into basePtr
601 int64_t offset = CN->getSExtValue();
602 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
607 rotate = DAG.getConstant(rotamt, EVT::i16);
609 // Simplify the base pointer for this case:
610 basePtr = basePtr.getOperand(0);
611 if ((offset & ~0xf) > 0) {
612 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
614 DAG.getConstant((offset & ~0xf), PtrVT));
616 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
617 || (basePtr.getOpcode() == SPUISD::IndirectAddr
618 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
619 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
620 // Plain aligned a-form address: rotate into preferred slot
621 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
622 int64_t rotamt = -vtm->prefslot_byte;
625 rotate = DAG.getConstant(rotamt, EVT::i16);
627 // Offset the rotate amount by the basePtr and the preferred slot
629 int64_t rotamt = -vtm->prefslot_byte;
632 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
634 DAG.getConstant(rotamt, PtrVT));
637 // Unaligned load: must be more pessimistic about addressing modes:
638 if (basePtr.getOpcode() == ISD::ADD) {
639 MachineFunction &MF = DAG.getMachineFunction();
640 MachineRegisterInfo &RegInfo = MF.getRegInfo();
641 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
644 SDValue Op0 = basePtr.getOperand(0);
645 SDValue Op1 = basePtr.getOperand(1);
647 if (isa<ConstantSDNode>(Op1)) {
648 // Convert the (add <ptr>, <const>) to an indirect address contained
649 // in a register. Note that this is done because we need to avoid
650 // creating a 0(reg) d-form address due to the SPU's block loads.
651 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
652 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
653 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
655 // Convert the (add <arg1>, <arg2>) to an indirect address, which
656 // will likely be lowered as a reg(reg) x-form address.
657 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
660 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
662 DAG.getConstant(0, PtrVT));
665 // Offset the rotate amount by the basePtr and the preferred slot
667 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
669 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
672 // Re-emit as a v16i8 vector load
673 result = DAG.getLoad(EVT::v16i8, dl, the_chain, basePtr,
674 LN->getSrcValue(), LN->getSrcValueOffset(),
675 LN->isVolatile(), 16);
678 the_chain = result.getValue(1);
680 // Rotate into the preferred slot:
681 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, EVT::v16i8,
682 result.getValue(0), rotate);
684 // Convert the loaded v16i8 vector to the appropriate vector type
685 // specified by the operand:
686 EVT vecVT = EVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
687 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
688 DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
690 // Handle extending loads by extending the scalar result:
691 if (ExtType == ISD::SEXTLOAD) {
692 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
693 } else if (ExtType == ISD::ZEXTLOAD) {
694 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
695 } else if (ExtType == ISD::EXTLOAD) {
696 unsigned NewOpc = ISD::ANY_EXTEND;
698 if (OutVT.isFloatingPoint())
699 NewOpc = ISD::FP_EXTEND;
701 result = DAG.getNode(NewOpc, dl, OutVT, result);
704 SDVTList retvts = DAG.getVTList(OutVT, EVT::Other);
705 SDValue retops[2] = {
710 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
711 retops, sizeof(retops) / sizeof(retops[0]));
718 case ISD::LAST_INDEXED_MODE:
721 raw_string_ostream Msg(msg);
722 Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
724 Msg << (unsigned) LN->getAddressingMode();
725 llvm_report_error(Msg.str());
733 /// Custom lower stores for CellSPU
735 All CellSPU stores are aligned to 16-byte boundaries, so for elements
736 within a 16-byte block, we have to generate a shuffle to insert the
737 requested element into its place, then store the resulting block.
740 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
741 StoreSDNode *SN = cast<StoreSDNode>(Op);
742 SDValue Value = SN->getValue();
743 EVT VT = Value.getValueType();
744 EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
745 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
746 DebugLoc dl = Op.getDebugLoc();
747 unsigned alignment = SN->getAlignment();
749 switch (SN->getAddressingMode()) {
750 case ISD::UNINDEXED: {
751 // The vector type we really want to load from the 16-byte chunk.
752 EVT vecVT = EVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
753 stVecVT = EVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
755 SDValue alignLoadVec;
756 SDValue basePtr = SN->getBasePtr();
757 SDValue the_chain = SN->getChain();
758 SDValue insertEltOffs;
760 if (alignment == 16) {
763 // Special cases for a known aligned load to simplify the base pointer
764 // and insertion byte:
765 if (basePtr.getOpcode() == ISD::ADD
766 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
767 // Known offset into basePtr
768 int64_t offset = CN->getSExtValue();
770 // Simplify the base pointer for this case:
771 basePtr = basePtr.getOperand(0);
772 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
774 DAG.getConstant((offset & 0xf), PtrVT));
776 if ((offset & ~0xf) > 0) {
777 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
779 DAG.getConstant((offset & ~0xf), PtrVT));
782 // Otherwise, assume it's at byte 0 of basePtr
783 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
785 DAG.getConstant(0, PtrVT));
788 // Unaligned load: must be more pessimistic about addressing modes:
789 if (basePtr.getOpcode() == ISD::ADD) {
790 MachineFunction &MF = DAG.getMachineFunction();
791 MachineRegisterInfo &RegInfo = MF.getRegInfo();
792 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
795 SDValue Op0 = basePtr.getOperand(0);
796 SDValue Op1 = basePtr.getOperand(1);
798 if (isa<ConstantSDNode>(Op1)) {
799 // Convert the (add <ptr>, <const>) to an indirect address contained
800 // in a register. Note that this is done because we need to avoid
801 // creating a 0(reg) d-form address due to the SPU's block loads.
802 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
803 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
804 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
806 // Convert the (add <arg1>, <arg2>) to an indirect address, which
807 // will likely be lowered as a reg(reg) x-form address.
808 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
811 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
813 DAG.getConstant(0, PtrVT));
816 // Insertion point is solely determined by basePtr's contents
817 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
819 DAG.getConstant(0, PtrVT));
822 // Re-emit as a v16i8 vector load
823 alignLoadVec = DAG.getLoad(EVT::v16i8, dl, the_chain, basePtr,
824 SN->getSrcValue(), SN->getSrcValueOffset(),
825 SN->isVolatile(), 16);
828 the_chain = alignLoadVec.getValue(1);
830 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
831 SDValue theValue = SN->getValue();
835 && (theValue.getOpcode() == ISD::AssertZext
836 || theValue.getOpcode() == ISD::AssertSext)) {
837 // Drill down and get the value for zero- and sign-extended
839 theValue = theValue.getOperand(0);
842 // If the base pointer is already a D-form address, then just create
843 // a new D-form address with a slot offset and the orignal base pointer.
844 // Otherwise generate a D-form address with the slot offset relative
845 // to the stack pointer, which is always aligned.
847 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
848 cerr << "CellSPU LowerSTORE: basePtr = ";
849 basePtr.getNode()->dump(&DAG);
854 SDValue insertEltOp =
855 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
856 SDValue vectorizeOp =
857 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
859 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
860 vectorizeOp, alignLoadVec,
861 DAG.getNode(ISD::BIT_CONVERT, dl,
862 EVT::v4i32, insertEltOp));
864 result = DAG.getStore(the_chain, dl, result, basePtr,
865 LN->getSrcValue(), LN->getSrcValueOffset(),
866 LN->isVolatile(), LN->getAlignment());
868 #if 0 && !defined(NDEBUG)
869 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
870 const SDValue ¤tRoot = DAG.getRoot();
873 cerr << "------- CellSPU:LowerStore result:\n";
876 DAG.setRoot(currentRoot);
887 case ISD::LAST_INDEXED_MODE:
890 raw_string_ostream Msg(msg);
891 Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
893 Msg << (unsigned) SN->getAddressingMode();
894 llvm_report_error(Msg.str());
902 //! Generate the address of a constant pool entry.
904 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
905 EVT PtrVT = Op.getValueType();
906 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
907 Constant *C = CP->getConstVal();
908 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
909 SDValue Zero = DAG.getConstant(0, PtrVT);
910 const TargetMachine &TM = DAG.getTarget();
911 // FIXME there is no actual debug info here
912 DebugLoc dl = Op.getDebugLoc();
914 if (TM.getRelocationModel() == Reloc::Static) {
915 if (!ST->usingLargeMem()) {
916 // Just return the SDValue with the constant pool address in it.
917 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
919 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
920 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
921 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
925 llvm_unreachable("LowerConstantPool: Relocation model other than static"
930 //! Alternate entry point for generating the address of a constant pool entry
932 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
933 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
937 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
938 EVT PtrVT = Op.getValueType();
939 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
940 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
941 SDValue Zero = DAG.getConstant(0, PtrVT);
942 const TargetMachine &TM = DAG.getTarget();
943 // FIXME there is no actual debug info here
944 DebugLoc dl = Op.getDebugLoc();
946 if (TM.getRelocationModel() == Reloc::Static) {
947 if (!ST->usingLargeMem()) {
948 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
950 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
951 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
952 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
956 llvm_unreachable("LowerJumpTable: Relocation model other than static"
962 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
963 EVT PtrVT = Op.getValueType();
964 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
965 GlobalValue *GV = GSDN->getGlobal();
966 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
967 const TargetMachine &TM = DAG.getTarget();
968 SDValue Zero = DAG.getConstant(0, PtrVT);
969 // FIXME there is no actual debug info here
970 DebugLoc dl = Op.getDebugLoc();
972 if (TM.getRelocationModel() == Reloc::Static) {
973 if (!ST->usingLargeMem()) {
974 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
976 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
977 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
978 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
981 llvm_report_error("LowerGlobalAddress: Relocation model other than static"
989 //! Custom lower double precision floating point constants
991 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
992 EVT VT = Op.getValueType();
993 // FIXME there is no actual debug info here
994 DebugLoc dl = Op.getDebugLoc();
996 if (VT == EVT::f64) {
997 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
1000 "LowerConstantFP: Node is not ConstantFPSDNode");
1002 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1003 SDValue T = DAG.getConstant(dbits, EVT::i64);
1004 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v2i64, T, T);
1005 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1006 DAG.getNode(ISD::BIT_CONVERT, dl, EVT::v2f64, Tvec));
1013 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1014 unsigned CallConv, bool isVarArg,
1015 const SmallVectorImpl<ISD::InputArg>
1017 DebugLoc dl, SelectionDAG &DAG,
1018 SmallVectorImpl<SDValue> &InVals) {
1020 MachineFunction &MF = DAG.getMachineFunction();
1021 MachineFrameInfo *MFI = MF.getFrameInfo();
1022 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1024 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1025 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1027 unsigned ArgOffset = SPUFrameInfo::minStackSize();
1028 unsigned ArgRegIdx = 0;
1029 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1031 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1033 // Add DAG nodes to load the arguments or copy them out of registers.
1034 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1035 EVT ObjectVT = Ins[ArgNo].VT;
1036 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1039 if (ArgRegIdx < NumArgRegs) {
1040 const TargetRegisterClass *ArgRegClass;
1042 switch (ObjectVT.getSimpleVT()) {
1045 raw_string_ostream Msg(msg);
1046 Msg << "LowerFormalArguments Unhandled argument type: "
1047 << ObjectVT.getEVTString();
1048 llvm_report_error(Msg.str());
1051 ArgRegClass = &SPU::R8CRegClass;
1054 ArgRegClass = &SPU::R16CRegClass;
1057 ArgRegClass = &SPU::R32CRegClass;
1060 ArgRegClass = &SPU::R64CRegClass;
1063 ArgRegClass = &SPU::GPRCRegClass;
1066 ArgRegClass = &SPU::R32FPRegClass;
1069 ArgRegClass = &SPU::R64FPRegClass;
1077 ArgRegClass = &SPU::VECREGRegClass;
1081 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1082 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1083 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1086 // We need to load the argument to a virtual register if we determined
1087 // above that we ran out of physical registers of the appropriate type
1088 // or we're forced to do vararg
1089 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1090 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1091 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
1092 ArgOffset += StackSlotSize;
1095 InVals.push_back(ArgVal);
1097 Chain = ArgVal.getOperand(0);
1102 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1103 // We will spill (79-3)+1 registers to the stack
1104 SmallVector<SDValue, 79-3+1> MemOps;
1106 // Create the frame slot
1108 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1109 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1110 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1111 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], EVT::v16i8);
1112 SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0);
1113 Chain = Store.getOperand(0);
1114 MemOps.push_back(Store);
1116 // Increment address by stack slot size for the next stored argument
1117 ArgOffset += StackSlotSize;
1119 if (!MemOps.empty())
1120 Chain = DAG.getNode(ISD::TokenFactor, dl, EVT::Other,
1121 &MemOps[0], MemOps.size());
1127 /// isLSAAddress - Return the immediate to use if the specified
1128 /// value is representable as a LSA address.
1129 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1130 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1133 int Addr = C->getZExtValue();
1134 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1135 (Addr << 14 >> 14) != Addr)
1136 return 0; // Top 14 bits have to be sext of immediate.
1138 return DAG.getConstant((int)C->getZExtValue() >> 2, EVT::i32).getNode();
1142 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1143 unsigned CallConv, bool isVarArg,
1145 const SmallVectorImpl<ISD::OutputArg> &Outs,
1146 const SmallVectorImpl<ISD::InputArg> &Ins,
1147 DebugLoc dl, SelectionDAG &DAG,
1148 SmallVectorImpl<SDValue> &InVals) {
1150 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1151 unsigned NumOps = Outs.size();
1152 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1153 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1154 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1156 // Handy pointer type
1157 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1159 // Accumulate how many bytes are to be pushed on the stack, including the
1160 // linkage area, and parameter passing area. According to the SPU ABI,
1161 // we minimally need space for [LR] and [SP]
1162 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1164 // Set up a copy of the stack pointer for use loading and storing any
1165 // arguments that may not fit in the registers available for argument
1167 SDValue StackPtr = DAG.getRegister(SPU::R1, EVT::i32);
1169 // Figure out which arguments are going to go in registers, and which in
1171 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1172 unsigned ArgRegIdx = 0;
1174 // Keep track of registers passing arguments
1175 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1176 // And the arguments passed on the stack
1177 SmallVector<SDValue, 8> MemOpChains;
1179 for (unsigned i = 0; i != NumOps; ++i) {
1180 SDValue Arg = Outs[i].Val;
1182 // PtrOff will be used to store the current argument to the stack if a
1183 // register cannot be found for it.
1184 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1185 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1187 switch (Arg.getValueType().getSimpleVT()) {
1188 default: llvm_unreachable("Unexpected ValueType for argument!");
1194 if (ArgRegIdx != NumArgRegs) {
1195 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1197 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1198 ArgOffset += StackSlotSize;
1203 if (ArgRegIdx != NumArgRegs) {
1204 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1206 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1207 ArgOffset += StackSlotSize;
1216 if (ArgRegIdx != NumArgRegs) {
1217 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1219 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1220 ArgOffset += StackSlotSize;
1226 // Update number of stack bytes actually used, insert a call sequence start
1227 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1228 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1231 if (!MemOpChains.empty()) {
1232 // Adjust the stack pointer for the stack arguments.
1233 Chain = DAG.getNode(ISD::TokenFactor, dl, EVT::Other,
1234 &MemOpChains[0], MemOpChains.size());
1237 // Build a sequence of copy-to-reg nodes chained together with token chain
1238 // and flag operands which copy the outgoing args into the appropriate regs.
1240 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1241 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1242 RegsToPass[i].second, InFlag);
1243 InFlag = Chain.getValue(1);
1246 SmallVector<SDValue, 8> Ops;
1247 unsigned CallOpc = SPUISD::CALL;
1249 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1250 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1251 // node so that legalize doesn't hack it.
1252 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1253 GlobalValue *GV = G->getGlobal();
1254 EVT CalleeVT = Callee.getValueType();
1255 SDValue Zero = DAG.getConstant(0, PtrVT);
1256 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1258 if (!ST->usingLargeMem()) {
1259 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1260 // style calls, otherwise, external symbols are BRASL calls. This assumes
1261 // that declared/defined symbols are in the same compilation unit and can
1262 // be reached through PC-relative jumps.
1265 // This may be an unsafe assumption for JIT and really large compilation
1267 if (GV->isDeclaration()) {
1268 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1270 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1273 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1275 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1277 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1278 EVT CalleeVT = Callee.getValueType();
1279 SDValue Zero = DAG.getConstant(0, PtrVT);
1280 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1281 Callee.getValueType());
1283 if (!ST->usingLargeMem()) {
1284 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1286 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1288 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1289 // If this is an absolute destination address that appears to be a legal
1290 // local store address, use the munged value.
1291 Callee = SDValue(Dest, 0);
1294 Ops.push_back(Chain);
1295 Ops.push_back(Callee);
1297 // Add argument registers to the end of the list so that they are known live
1299 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1300 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1301 RegsToPass[i].second.getValueType()));
1303 if (InFlag.getNode())
1304 Ops.push_back(InFlag);
1305 // Returns a chain and a flag for retval copy to use.
1306 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(EVT::Other, EVT::Flag),
1307 &Ops[0], Ops.size());
1308 InFlag = Chain.getValue(1);
1310 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1311 DAG.getIntPtrConstant(0, true), InFlag);
1313 InFlag = Chain.getValue(1);
1315 // If the function returns void, just return the chain.
1319 // If the call has results, copy the values out of the ret val registers.
1320 switch (Ins[0].VT.getSimpleVT()) {
1321 default: llvm_unreachable("Unexpected ret value!");
1322 case EVT::Other: break;
1324 if (Ins.size() > 1 && Ins[1].VT == EVT::i32) {
1325 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1326 EVT::i32, InFlag).getValue(1);
1327 InVals.push_back(Chain.getValue(0));
1328 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, EVT::i32,
1329 Chain.getValue(2)).getValue(1);
1330 InVals.push_back(Chain.getValue(0));
1332 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, EVT::i32,
1333 InFlag).getValue(1);
1334 InVals.push_back(Chain.getValue(0));
1338 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, EVT::i64,
1339 InFlag).getValue(1);
1340 InVals.push_back(Chain.getValue(0));
1343 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, EVT::i128,
1344 InFlag).getValue(1);
1345 InVals.push_back(Chain.getValue(0));
1349 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1350 InFlag).getValue(1);
1351 InVals.push_back(Chain.getValue(0));
1359 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1360 InFlag).getValue(1);
1361 InVals.push_back(Chain.getValue(0));
1369 SPUTargetLowering::LowerReturn(SDValue Chain,
1370 unsigned CallConv, bool isVarArg,
1371 const SmallVectorImpl<ISD::OutputArg> &Outs,
1372 DebugLoc dl, SelectionDAG &DAG) {
1374 SmallVector<CCValAssign, 16> RVLocs;
1375 CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1376 RVLocs, *DAG.getContext());
1377 CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1379 // If this is the first return lowered for this function, add the regs to the
1380 // liveout set for the function.
1381 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1382 for (unsigned i = 0; i != RVLocs.size(); ++i)
1383 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1388 // Copy the result values into the output registers.
1389 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1390 CCValAssign &VA = RVLocs[i];
1391 assert(VA.isRegLoc() && "Can only return in registers!");
1392 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1394 Flag = Chain.getValue(1);
1398 return DAG.getNode(SPUISD::RET_FLAG, dl, EVT::Other, Chain, Flag);
1400 return DAG.getNode(SPUISD::RET_FLAG, dl, EVT::Other, Chain);
1404 //===----------------------------------------------------------------------===//
1405 // Vector related lowering:
1406 //===----------------------------------------------------------------------===//
1408 static ConstantSDNode *
1409 getVecImm(SDNode *N) {
1410 SDValue OpVal(0, 0);
1412 // Check to see if this buildvec has a single non-undef value in its elements.
1413 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1414 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1415 if (OpVal.getNode() == 0)
1416 OpVal = N->getOperand(i);
1417 else if (OpVal != N->getOperand(i))
1421 if (OpVal.getNode() != 0) {
1422 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1430 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1431 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1433 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1435 if (ConstantSDNode *CN = getVecImm(N)) {
1436 uint64_t Value = CN->getZExtValue();
1437 if (ValueType == EVT::i64) {
1438 uint64_t UValue = CN->getZExtValue();
1439 uint32_t upper = uint32_t(UValue >> 32);
1440 uint32_t lower = uint32_t(UValue);
1443 Value = Value >> 32;
1445 if (Value <= 0x3ffff)
1446 return DAG.getTargetConstant(Value, ValueType);
1452 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1453 /// and the value fits into a signed 16-bit constant, and if so, return the
1455 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1457 if (ConstantSDNode *CN = getVecImm(N)) {
1458 int64_t Value = CN->getSExtValue();
1459 if (ValueType == EVT::i64) {
1460 uint64_t UValue = CN->getZExtValue();
1461 uint32_t upper = uint32_t(UValue >> 32);
1462 uint32_t lower = uint32_t(UValue);
1465 Value = Value >> 32;
1467 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1468 return DAG.getTargetConstant(Value, ValueType);
1475 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1476 /// and the value fits into a signed 10-bit constant, and if so, return the
1478 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1480 if (ConstantSDNode *CN = getVecImm(N)) {
1481 int64_t Value = CN->getSExtValue();
1482 if (ValueType == EVT::i64) {
1483 uint64_t UValue = CN->getZExtValue();
1484 uint32_t upper = uint32_t(UValue >> 32);
1485 uint32_t lower = uint32_t(UValue);
1488 Value = Value >> 32;
1490 if (isS10Constant(Value))
1491 return DAG.getTargetConstant(Value, ValueType);
1497 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1498 /// and the value fits into a signed 8-bit constant, and if so, return the
1501 /// @note: The incoming vector is v16i8 because that's the only way we can load
1502 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1504 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1506 if (ConstantSDNode *CN = getVecImm(N)) {
1507 int Value = (int) CN->getZExtValue();
1508 if (ValueType == EVT::i16
1509 && Value <= 0xffff /* truncated from uint64_t */
1510 && ((short) Value >> 8) == ((short) Value & 0xff))
1511 return DAG.getTargetConstant(Value & 0xff, ValueType);
1512 else if (ValueType == EVT::i8
1513 && (Value & 0xff) == Value)
1514 return DAG.getTargetConstant(Value, ValueType);
1520 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1521 /// and the value fits into a signed 16-bit constant, and if so, return the
1523 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1525 if (ConstantSDNode *CN = getVecImm(N)) {
1526 uint64_t Value = CN->getZExtValue();
1527 if ((ValueType == EVT::i32
1528 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1529 || (ValueType == EVT::i64 && (Value & 0xffff0000) == Value))
1530 return DAG.getTargetConstant(Value >> 16, ValueType);
1536 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1537 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1538 if (ConstantSDNode *CN = getVecImm(N)) {
1539 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), EVT::i32);
1545 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1546 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1547 if (ConstantSDNode *CN = getVecImm(N)) {
1548 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), EVT::i64);
1554 //! Lower a BUILD_VECTOR instruction creatively:
1556 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1557 EVT VT = Op.getValueType();
1558 EVT EltVT = VT.getVectorElementType();
1559 DebugLoc dl = Op.getDebugLoc();
1560 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1561 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1562 unsigned minSplatBits = EltVT.getSizeInBits();
1564 if (minSplatBits < 16)
1567 APInt APSplatBits, APSplatUndef;
1568 unsigned SplatBitSize;
1571 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1572 HasAnyUndefs, minSplatBits)
1573 || minSplatBits < SplatBitSize)
1574 return SDValue(); // Wasn't a constant vector or splat exceeded min
1576 uint64_t SplatBits = APSplatBits.getZExtValue();
1578 switch (VT.getSimpleVT()) {
1581 raw_string_ostream Msg(msg);
1582 Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1583 << VT.getEVTString();
1584 llvm_report_error(Msg.str());
1588 uint32_t Value32 = uint32_t(SplatBits);
1589 assert(SplatBitSize == 32
1590 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1591 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1592 SDValue T = DAG.getConstant(Value32, EVT::i32);
1593 return DAG.getNode(ISD::BIT_CONVERT, dl, EVT::v4f32,
1594 DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32, T,T,T,T));
1598 uint64_t f64val = uint64_t(SplatBits);
1599 assert(SplatBitSize == 64
1600 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1601 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1602 SDValue T = DAG.getConstant(f64val, EVT::i64);
1603 return DAG.getNode(ISD::BIT_CONVERT, dl, EVT::v2f64,
1604 DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v2i64, T, T));
1608 // 8-bit constants have to be expanded to 16-bits
1609 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1610 SmallVector<SDValue, 8> Ops;
1612 Ops.assign(8, DAG.getConstant(Value16, EVT::i16));
1613 return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1614 DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v8i16, &Ops[0], Ops.size()));
1617 unsigned short Value16 = SplatBits;
1618 SDValue T = DAG.getConstant(Value16, EltVT);
1619 SmallVector<SDValue, 8> Ops;
1622 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1625 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1626 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1629 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1630 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1633 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1643 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1645 uint32_t upper = uint32_t(SplatVal >> 32);
1646 uint32_t lower = uint32_t(SplatVal);
1648 if (upper == lower) {
1649 // Magic constant that can be matched by IL, ILA, et. al.
1650 SDValue Val = DAG.getTargetConstant(upper, EVT::i32);
1651 return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1652 DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32,
1653 Val, Val, Val, Val));
1655 bool upper_special, lower_special;
1657 // NOTE: This code creates common-case shuffle masks that can be easily
1658 // detected as common expressions. It is not attempting to create highly
1659 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1661 // Detect if the upper or lower half is a special shuffle mask pattern:
1662 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1663 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1665 // Both upper and lower are special, lower to a constant pool load:
1666 if (lower_special && upper_special) {
1667 SDValue SplatValCN = DAG.getConstant(SplatVal, EVT::i64);
1668 return DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v2i64,
1669 SplatValCN, SplatValCN);
1674 SmallVector<SDValue, 16> ShufBytes;
1677 // Create lower vector if not a special pattern
1678 if (!lower_special) {
1679 SDValue LO32C = DAG.getConstant(lower, EVT::i32);
1680 LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1681 DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32,
1682 LO32C, LO32C, LO32C, LO32C));
1685 // Create upper vector if not a special pattern
1686 if (!upper_special) {
1687 SDValue HI32C = DAG.getConstant(upper, EVT::i32);
1688 HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1689 DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32,
1690 HI32C, HI32C, HI32C, HI32C));
1693 // If either upper or lower are special, then the two input operands are
1694 // the same (basically, one of them is a "don't care")
1700 for (int i = 0; i < 4; ++i) {
1702 for (int j = 0; j < 4; ++j) {
1704 bool process_upper, process_lower;
1706 process_upper = (upper_special && (i & 1) == 0);
1707 process_lower = (lower_special && (i & 1) == 1);
1709 if (process_upper || process_lower) {
1710 if ((process_upper && upper == 0)
1711 || (process_lower && lower == 0))
1713 else if ((process_upper && upper == 0xffffffff)
1714 || (process_lower && lower == 0xffffffff))
1716 else if ((process_upper && upper == 0x80000000)
1717 || (process_lower && lower == 0x80000000))
1718 val |= (j == 0 ? 0xe0 : 0x80);
1720 val |= i * 4 + j + ((i & 1) * 16);
1723 ShufBytes.push_back(DAG.getConstant(val, EVT::i32));
1726 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1727 DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32,
1728 &ShufBytes[0], ShufBytes.size()));
1732 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1733 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1734 /// permutation vector, V3, is monotonically increasing with one "exception"
1735 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1736 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1737 /// In either case, the net result is going to eventually invoke SHUFB to
1738 /// permute/shuffle the bytes from V1 and V2.
1740 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1741 /// control word for byte/halfword/word insertion. This takes care of a single
1742 /// element move from V2 into V1.
1744 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1745 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1746 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1747 SDValue V1 = Op.getOperand(0);
1748 SDValue V2 = Op.getOperand(1);
1749 DebugLoc dl = Op.getDebugLoc();
1751 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1753 // If we have a single element being moved from V1 to V2, this can be handled
1754 // using the C*[DX] compute mask instructions, but the vector elements have
1755 // to be monotonically increasing with one exception element.
1756 EVT VecVT = V1.getValueType();
1757 EVT EltVT = VecVT.getVectorElementType();
1758 unsigned EltsFromV2 = 0;
1760 unsigned V2EltIdx0 = 0;
1761 unsigned CurrElt = 0;
1762 unsigned MaxElts = VecVT.getVectorNumElements();
1763 unsigned PrevElt = 0;
1765 bool monotonic = true;
1768 if (EltVT == EVT::i8) {
1770 } else if (EltVT == EVT::i16) {
1772 } else if (EltVT == EVT::i32 || EltVT == EVT::f32) {
1774 } else if (EltVT == EVT::i64 || EltVT == EVT::f64) {
1777 llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1779 for (unsigned i = 0; i != MaxElts; ++i) {
1780 if (SVN->getMaskElt(i) < 0)
1783 unsigned SrcElt = SVN->getMaskElt(i);
1786 if (SrcElt >= V2EltIdx0) {
1787 if (1 >= (++EltsFromV2)) {
1788 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1790 } else if (CurrElt != SrcElt) {
1798 if (PrevElt > 0 && SrcElt < MaxElts) {
1799 if ((PrevElt == SrcElt - 1)
1800 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1807 } else if (PrevElt == 0) {
1808 // First time through, need to keep track of previous element
1811 // This isn't a rotation, takes elements from vector 2
1817 if (EltsFromV2 == 1 && monotonic) {
1818 // Compute mask and shuffle
1819 MachineFunction &MF = DAG.getMachineFunction();
1820 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1821 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1822 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1823 // Initialize temporary register to 0
1824 SDValue InitTempReg =
1825 DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1826 // Copy register's contents as index in SHUFFLE_MASK:
1827 SDValue ShufMaskOp =
1828 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, EVT::v4i32,
1829 DAG.getTargetConstant(V2Elt, EVT::i32),
1830 DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1831 // Use shuffle mask in SHUFB synthetic instruction:
1832 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1834 } else if (rotate) {
1835 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1837 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1838 V1, DAG.getConstant(rotamt, EVT::i16));
1840 // Convert the SHUFFLE_VECTOR mask's input element units to the
1842 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1844 SmallVector<SDValue, 16> ResultMask;
1845 for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1846 unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1848 for (unsigned j = 0; j < BytesPerElement; ++j)
1849 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,EVT::i8));
1852 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v16i8,
1853 &ResultMask[0], ResultMask.size());
1854 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1858 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1859 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1860 DebugLoc dl = Op.getDebugLoc();
1862 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1863 // For a constant, build the appropriate constant vector, which will
1864 // eventually simplify to a vector register load.
1866 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1867 SmallVector<SDValue, 16> ConstVecValues;
1871 // Create a constant vector:
1872 switch (Op.getValueType().getSimpleVT()) {
1873 default: llvm_unreachable("Unexpected constant value type in "
1874 "LowerSCALAR_TO_VECTOR");
1875 case EVT::v16i8: n_copies = 16; VT = EVT::i8; break;
1876 case EVT::v8i16: n_copies = 8; VT = EVT::i16; break;
1877 case EVT::v4i32: n_copies = 4; VT = EVT::i32; break;
1878 case EVT::v4f32: n_copies = 4; VT = EVT::f32; break;
1879 case EVT::v2i64: n_copies = 2; VT = EVT::i64; break;
1880 case EVT::v2f64: n_copies = 2; VT = EVT::f64; break;
1883 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1884 for (size_t j = 0; j < n_copies; ++j)
1885 ConstVecValues.push_back(CValue);
1887 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1888 &ConstVecValues[0], ConstVecValues.size());
1890 // Otherwise, copy the value from one register to another:
1891 switch (Op0.getValueType().getSimpleVT()) {
1892 default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1899 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1906 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1907 EVT VT = Op.getValueType();
1908 SDValue N = Op.getOperand(0);
1909 SDValue Elt = Op.getOperand(1);
1910 DebugLoc dl = Op.getDebugLoc();
1913 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1914 // Constant argument:
1915 int EltNo = (int) C->getZExtValue();
1918 if (VT == EVT::i8 && EltNo >= 16)
1919 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1920 else if (VT == EVT::i16 && EltNo >= 8)
1921 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1922 else if (VT == EVT::i32 && EltNo >= 4)
1923 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1924 else if (VT == EVT::i64 && EltNo >= 2)
1925 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1927 if (EltNo == 0 && (VT == EVT::i32 || VT == EVT::i64)) {
1928 // i32 and i64: Element 0 is the preferred slot
1929 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1932 // Need to generate shuffle mask and extract:
1933 int prefslot_begin = -1, prefslot_end = -1;
1934 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1936 switch (VT.getSimpleVT()) {
1938 assert(false && "Invalid value type!");
1940 prefslot_begin = prefslot_end = 3;
1944 prefslot_begin = 2; prefslot_end = 3;
1949 prefslot_begin = 0; prefslot_end = 3;
1954 prefslot_begin = 0; prefslot_end = 7;
1959 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1960 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1962 unsigned int ShufBytes[16];
1963 for (int i = 0; i < 16; ++i) {
1964 // zero fill uppper part of preferred slot, don't care about the
1966 unsigned int mask_val;
1967 if (i <= prefslot_end) {
1969 ((i < prefslot_begin)
1971 : elt_byte + (i - prefslot_begin));
1973 ShufBytes[i] = mask_val;
1975 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1978 SDValue ShufMask[4];
1979 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1980 unsigned bidx = i * 4;
1981 unsigned int bits = ((ShufBytes[bidx] << 24) |
1982 (ShufBytes[bidx+1] << 16) |
1983 (ShufBytes[bidx+2] << 8) |
1985 ShufMask[i] = DAG.getConstant(bits, EVT::i32);
1988 SDValue ShufMaskVec =
1989 DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32,
1990 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1992 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1993 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
1994 N, N, ShufMaskVec));
1996 // Variable index: Rotate the requested element into slot 0, then replicate
1997 // slot 0 across the vector
1998 EVT VecVT = N.getValueType();
1999 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2000 llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2004 // Make life easier by making sure the index is zero-extended to i32
2005 if (Elt.getValueType() != EVT::i32)
2006 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, EVT::i32, Elt);
2008 // Scale the index to a bit/byte shift quantity
2010 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2011 unsigned scaleShift = scaleFactor.logBase2();
2014 if (scaleShift > 0) {
2015 // Scale the shift factor:
2016 Elt = DAG.getNode(ISD::SHL, dl, EVT::i32, Elt,
2017 DAG.getConstant(scaleShift, EVT::i32));
2020 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2022 // Replicate the bytes starting at byte 0 across the entire vector (for
2023 // consistency with the notion of a unified register set)
2026 switch (VT.getSimpleVT()) {
2028 llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2032 SDValue factor = DAG.getConstant(0x00000000, EVT::i32);
2033 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32,
2034 factor, factor, factor, factor);
2038 SDValue factor = DAG.getConstant(0x00010001, EVT::i32);
2039 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32,
2040 factor, factor, factor, factor);
2045 SDValue factor = DAG.getConstant(0x00010203, EVT::i32);
2046 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32,
2047 factor, factor, factor, factor);
2052 SDValue loFactor = DAG.getConstant(0x00010203, EVT::i32);
2053 SDValue hiFactor = DAG.getConstant(0x04050607, EVT::i32);
2054 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32,
2055 loFactor, hiFactor, loFactor, hiFactor);
2060 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2061 DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2062 vecShift, vecShift, replicate));
2068 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2069 SDValue VecOp = Op.getOperand(0);
2070 SDValue ValOp = Op.getOperand(1);
2071 SDValue IdxOp = Op.getOperand(2);
2072 DebugLoc dl = Op.getDebugLoc();
2073 EVT VT = Op.getValueType();
2075 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2076 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2078 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2079 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2080 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2081 DAG.getRegister(SPU::R1, PtrVT),
2082 DAG.getConstant(CN->getSExtValue(), PtrVT));
2083 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2086 DAG.getNode(SPUISD::SHUFB, dl, VT,
2087 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2089 DAG.getNode(ISD::BIT_CONVERT, dl, EVT::v4i32, ShufMask));
2094 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2095 const TargetLowering &TLI)
2097 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2098 DebugLoc dl = Op.getDebugLoc();
2099 EVT ShiftVT = TLI.getShiftAmountTy();
2101 assert(Op.getValueType() == EVT::i8);
2104 llvm_unreachable("Unhandled i8 math operator");
2108 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2110 SDValue N1 = Op.getOperand(1);
2111 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, EVT::i16, N0);
2112 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, EVT::i16, N1);
2113 return DAG.getNode(ISD::TRUNCATE, dl, EVT::i8,
2114 DAG.getNode(Opc, dl, EVT::i16, N0, N1));
2119 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2121 SDValue N1 = Op.getOperand(1);
2122 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, EVT::i16, N0);
2123 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, EVT::i16, N1);
2124 return DAG.getNode(ISD::TRUNCATE, dl, EVT::i8,
2125 DAG.getNode(Opc, dl, EVT::i16, N0, N1));
2129 SDValue N1 = Op.getOperand(1);
2130 EVT N1VT = N1.getValueType();
2132 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, EVT::i16, N0);
2133 if (!N1VT.bitsEq(ShiftVT)) {
2134 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2137 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2140 // Replicate lower 8-bits into upper 8:
2142 DAG.getNode(ISD::OR, dl, EVT::i16, N0,
2143 DAG.getNode(ISD::SHL, dl, EVT::i16,
2144 N0, DAG.getConstant(8, EVT::i32)));
2146 // Truncate back down to i8
2147 return DAG.getNode(ISD::TRUNCATE, dl, EVT::i8,
2148 DAG.getNode(Opc, dl, EVT::i16, ExpandArg, N1));
2152 SDValue N1 = Op.getOperand(1);
2153 EVT N1VT = N1.getValueType();
2155 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, EVT::i16, N0);
2156 if (!N1VT.bitsEq(ShiftVT)) {
2157 unsigned N1Opc = ISD::ZERO_EXTEND;
2159 if (N1.getValueType().bitsGT(ShiftVT))
2160 N1Opc = ISD::TRUNCATE;
2162 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2165 return DAG.getNode(ISD::TRUNCATE, dl, EVT::i8,
2166 DAG.getNode(Opc, dl, EVT::i16, N0, N1));
2169 SDValue N1 = Op.getOperand(1);
2170 EVT N1VT = N1.getValueType();
2172 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, EVT::i16, N0);
2173 if (!N1VT.bitsEq(ShiftVT)) {
2174 unsigned N1Opc = ISD::SIGN_EXTEND;
2176 if (N1VT.bitsGT(ShiftVT))
2177 N1Opc = ISD::TRUNCATE;
2178 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2181 return DAG.getNode(ISD::TRUNCATE, dl, EVT::i8,
2182 DAG.getNode(Opc, dl, EVT::i16, N0, N1));
2185 SDValue N1 = Op.getOperand(1);
2187 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, EVT::i16, N0);
2188 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, EVT::i16, N1);
2189 return DAG.getNode(ISD::TRUNCATE, dl, EVT::i8,
2190 DAG.getNode(Opc, dl, EVT::i16, N0, N1));
2198 //! Lower byte immediate operations for v16i8 vectors:
2200 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2203 EVT VT = Op.getValueType();
2204 DebugLoc dl = Op.getDebugLoc();
2206 ConstVec = Op.getOperand(0);
2207 Arg = Op.getOperand(1);
2208 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2209 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2210 ConstVec = ConstVec.getOperand(0);
2212 ConstVec = Op.getOperand(1);
2213 Arg = Op.getOperand(0);
2214 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2215 ConstVec = ConstVec.getOperand(0);
2220 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2221 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2222 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2224 APInt APSplatBits, APSplatUndef;
2225 unsigned SplatBitSize;
2227 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2229 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2230 HasAnyUndefs, minSplatBits)
2231 && minSplatBits <= SplatBitSize) {
2232 uint64_t SplatBits = APSplatBits.getZExtValue();
2233 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, EVT::i8);
2235 SmallVector<SDValue, 16> tcVec;
2236 tcVec.assign(16, tc);
2237 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2238 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2242 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2243 // lowered. Return the operation, rather than a null SDValue.
2247 //! Custom lowering for CTPOP (count population)
2249 Custom lowering code that counts the number ones in the input
2250 operand. SPU has such an instruction, but it counts the number of
2251 ones per byte, which then have to be accumulated.
2253 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2254 EVT VT = Op.getValueType();
2255 EVT vecVT = EVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2256 DebugLoc dl = Op.getDebugLoc();
2258 switch (VT.getSimpleVT()) {
2260 assert(false && "Invalid value type!");
2262 SDValue N = Op.getOperand(0);
2263 SDValue Elt0 = DAG.getConstant(0, EVT::i32);
2265 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2266 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2268 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EVT::i8, CNTB, Elt0);
2272 MachineFunction &MF = DAG.getMachineFunction();
2273 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2275 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2277 SDValue N = Op.getOperand(0);
2278 SDValue Elt0 = DAG.getConstant(0, EVT::i16);
2279 SDValue Mask0 = DAG.getConstant(0x0f, EVT::i16);
2280 SDValue Shift1 = DAG.getConstant(8, EVT::i32);
2282 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2283 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2285 // CNTB_result becomes the chain to which all of the virtual registers
2286 // CNTB_reg, SUM1_reg become associated:
2287 SDValue CNTB_result =
2288 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EVT::i16, CNTB, Elt0);
2290 SDValue CNTB_rescopy =
2291 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2293 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, EVT::i16);
2295 return DAG.getNode(ISD::AND, dl, EVT::i16,
2296 DAG.getNode(ISD::ADD, dl, EVT::i16,
2297 DAG.getNode(ISD::SRL, dl, EVT::i16,
2304 MachineFunction &MF = DAG.getMachineFunction();
2305 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2307 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2308 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2310 SDValue N = Op.getOperand(0);
2311 SDValue Elt0 = DAG.getConstant(0, EVT::i32);
2312 SDValue Mask0 = DAG.getConstant(0xff, EVT::i32);
2313 SDValue Shift1 = DAG.getConstant(16, EVT::i32);
2314 SDValue Shift2 = DAG.getConstant(8, EVT::i32);
2316 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2317 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2319 // CNTB_result becomes the chain to which all of the virtual registers
2320 // CNTB_reg, SUM1_reg become associated:
2321 SDValue CNTB_result =
2322 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EVT::i32, CNTB, Elt0);
2324 SDValue CNTB_rescopy =
2325 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2328 DAG.getNode(ISD::SRL, dl, EVT::i32,
2329 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, EVT::i32),
2333 DAG.getNode(ISD::ADD, dl, EVT::i32, Comp1,
2334 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, EVT::i32));
2336 SDValue Sum1_rescopy =
2337 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2340 DAG.getNode(ISD::SRL, dl, EVT::i32,
2341 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, EVT::i32),
2344 DAG.getNode(ISD::ADD, dl, EVT::i32, Comp2,
2345 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, EVT::i32));
2347 return DAG.getNode(ISD::AND, dl, EVT::i32, Sum2, Mask0);
2357 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2359 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2360 All conversions to i64 are expanded to a libcall.
2362 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2363 SPUTargetLowering &TLI) {
2364 EVT OpVT = Op.getValueType();
2365 SDValue Op0 = Op.getOperand(0);
2366 EVT Op0VT = Op0.getValueType();
2368 if ((OpVT == EVT::i32 && Op0VT == EVT::f64)
2369 || OpVT == EVT::i64) {
2370 // Convert f32 / f64 to i32 / i64 via libcall.
2372 (Op.getOpcode() == ISD::FP_TO_SINT)
2373 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2374 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2375 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2377 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2383 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2385 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2386 All conversions from i64 are expanded to a libcall.
2388 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2389 SPUTargetLowering &TLI) {
2390 EVT OpVT = Op.getValueType();
2391 SDValue Op0 = Op.getOperand(0);
2392 EVT Op0VT = Op0.getValueType();
2394 if ((OpVT == EVT::f64 && Op0VT == EVT::i32)
2395 || Op0VT == EVT::i64) {
2396 // Convert i32, i64 to f64 via libcall:
2398 (Op.getOpcode() == ISD::SINT_TO_FP)
2399 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2400 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2401 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2403 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2409 //! Lower ISD::SETCC
2411 This handles EVT::f64 (double floating point) condition lowering
2413 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2414 const TargetLowering &TLI) {
2415 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2416 DebugLoc dl = Op.getDebugLoc();
2417 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2419 SDValue lhs = Op.getOperand(0);
2420 SDValue rhs = Op.getOperand(1);
2421 EVT lhsVT = lhs.getValueType();
2422 assert(lhsVT == EVT::f64 && "LowerSETCC: type other than EVT::64\n");
2424 EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2425 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2426 EVT IntVT(EVT::i64);
2428 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2429 // selected to a NOP:
2430 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2432 DAG.getNode(ISD::TRUNCATE, dl, EVT::i32,
2433 DAG.getNode(ISD::SRL, dl, IntVT,
2434 i64lhs, DAG.getConstant(32, EVT::i32)));
2435 SDValue lhsHi32abs =
2436 DAG.getNode(ISD::AND, dl, EVT::i32,
2437 lhsHi32, DAG.getConstant(0x7fffffff, EVT::i32));
2439 DAG.getNode(ISD::TRUNCATE, dl, EVT::i32, i64lhs);
2441 // SETO and SETUO only use the lhs operand:
2442 if (CC->get() == ISD::SETO) {
2443 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2445 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2446 return DAG.getNode(ISD::XOR, dl, ccResultVT,
2447 DAG.getSetCC(dl, ccResultVT,
2448 lhs, DAG.getConstantFP(0.0, lhsVT),
2450 DAG.getConstant(ccResultAllOnes, ccResultVT));
2451 } else if (CC->get() == ISD::SETUO) {
2452 // Evaluates to true if Op0 is [SQ]NaN
2453 return DAG.getNode(ISD::AND, dl, ccResultVT,
2454 DAG.getSetCC(dl, ccResultVT,
2456 DAG.getConstant(0x7ff00000, EVT::i32),
2458 DAG.getSetCC(dl, ccResultVT,
2460 DAG.getConstant(0, EVT::i32),
2464 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2466 DAG.getNode(ISD::TRUNCATE, dl, EVT::i32,
2467 DAG.getNode(ISD::SRL, dl, IntVT,
2468 i64rhs, DAG.getConstant(32, EVT::i32)));
2470 // If a value is negative, subtract from the sign magnitude constant:
2471 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2473 // Convert the sign-magnitude representation into 2's complement:
2474 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2475 lhsHi32, DAG.getConstant(31, EVT::i32));
2476 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2478 DAG.getNode(ISD::SELECT, dl, IntVT,
2479 lhsSelectMask, lhsSignMag2TC, i64lhs);
2481 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2482 rhsHi32, DAG.getConstant(31, EVT::i32));
2483 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2485 DAG.getNode(ISD::SELECT, dl, IntVT,
2486 rhsSelectMask, rhsSignMag2TC, i64rhs);
2490 switch (CC->get()) {
2493 compareOp = ISD::SETEQ; break;
2496 compareOp = ISD::SETGT; break;
2499 compareOp = ISD::SETGE; break;
2502 compareOp = ISD::SETLT; break;
2505 compareOp = ISD::SETLE; break;
2508 compareOp = ISD::SETNE; break;
2510 llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
2514 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2515 (ISD::CondCode) compareOp);
2517 if ((CC->get() & 0x8) == 0) {
2518 // Ordered comparison:
2519 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2520 lhs, DAG.getConstantFP(0.0, EVT::f64),
2522 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2523 rhs, DAG.getConstantFP(0.0, EVT::f64),
2525 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2527 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2533 //! Lower ISD::SELECT_CC
2535 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2538 \note Need to revisit this in the future: if the code path through the true
2539 and false value computations is longer than the latency of a branch (6
2540 cycles), then it would be more advantageous to branch and insert a new basic
2541 block and branch on the condition. However, this code does not make that
2542 assumption, given the simplisitc uses so far.
2545 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2546 const TargetLowering &TLI) {
2547 EVT VT = Op.getValueType();
2548 SDValue lhs = Op.getOperand(0);
2549 SDValue rhs = Op.getOperand(1);
2550 SDValue trueval = Op.getOperand(2);
2551 SDValue falseval = Op.getOperand(3);
2552 SDValue condition = Op.getOperand(4);
2553 DebugLoc dl = Op.getDebugLoc();
2555 // NOTE: SELB's arguments: $rA, $rB, $mask
2557 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2558 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2559 // condition was true and 0s where the condition was false. Hence, the
2560 // arguments to SELB get reversed.
2562 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2563 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2564 // with another "cannot select select_cc" assert:
2566 SDValue compare = DAG.getNode(ISD::SETCC, dl,
2567 TLI.getSetCCResultType(Op.getValueType()),
2568 lhs, rhs, condition);
2569 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2572 //! Custom lower ISD::TRUNCATE
2573 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2575 // Type to truncate to
2576 EVT VT = Op.getValueType();
2577 EVT::SimpleValueType simpleVT = VT.getSimpleVT();
2578 EVT VecVT = EVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2579 DebugLoc dl = Op.getDebugLoc();
2581 // Type to truncate from
2582 SDValue Op0 = Op.getOperand(0);
2583 EVT Op0VT = Op0.getValueType();
2585 if (Op0VT.getSimpleVT() == EVT::i128 && simpleVT == EVT::i64) {
2586 // Create shuffle mask, least significant doubleword of quadword
2587 unsigned maskHigh = 0x08090a0b;
2588 unsigned maskLow = 0x0c0d0e0f;
2589 // Use a shuffle to perform the truncation
2590 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::v4i32,
2591 DAG.getConstant(maskHigh, EVT::i32),
2592 DAG.getConstant(maskLow, EVT::i32),
2593 DAG.getConstant(maskHigh, EVT::i32),
2594 DAG.getConstant(maskLow, EVT::i32));
2596 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2597 Op0, Op0, shufMask);
2599 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2602 return SDValue(); // Leave the truncate unmolested
2605 //! Custom (target-specific) lowering entry point
2607 This is where LLVM's DAG selection process calls to do target-specific
2611 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2613 unsigned Opc = (unsigned) Op.getOpcode();
2614 EVT VT = Op.getValueType();
2619 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2620 cerr << "Op.getOpcode() = " << Opc << "\n";
2621 cerr << "*Op.getNode():\n";
2622 Op.getNode()->dump();
2624 llvm_unreachable(0);
2630 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2632 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2633 case ISD::ConstantPool:
2634 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2635 case ISD::GlobalAddress:
2636 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2637 case ISD::JumpTable:
2638 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2639 case ISD::ConstantFP:
2640 return LowerConstantFP(Op, DAG);
2642 // i8, i64 math ops:
2651 return LowerI8Math(Op, DAG, Opc, *this);
2655 case ISD::FP_TO_SINT:
2656 case ISD::FP_TO_UINT:
2657 return LowerFP_TO_INT(Op, DAG, *this);
2659 case ISD::SINT_TO_FP:
2660 case ISD::UINT_TO_FP:
2661 return LowerINT_TO_FP(Op, DAG, *this);
2663 // Vector-related lowering.
2664 case ISD::BUILD_VECTOR:
2665 return LowerBUILD_VECTOR(Op, DAG);
2666 case ISD::SCALAR_TO_VECTOR:
2667 return LowerSCALAR_TO_VECTOR(Op, DAG);
2668 case ISD::VECTOR_SHUFFLE:
2669 return LowerVECTOR_SHUFFLE(Op, DAG);
2670 case ISD::EXTRACT_VECTOR_ELT:
2671 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2672 case ISD::INSERT_VECTOR_ELT:
2673 return LowerINSERT_VECTOR_ELT(Op, DAG);
2675 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2679 return LowerByteImmed(Op, DAG);
2681 // Vector and i8 multiply:
2684 return LowerI8Math(Op, DAG, Opc, *this);
2687 return LowerCTPOP(Op, DAG);
2689 case ISD::SELECT_CC:
2690 return LowerSELECT_CC(Op, DAG, *this);
2693 return LowerSETCC(Op, DAG, *this);
2696 return LowerTRUNCATE(Op, DAG);
2702 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2703 SmallVectorImpl<SDValue>&Results,
2707 unsigned Opc = (unsigned) N->getOpcode();
2708 EVT OpVT = N->getValueType(0);
2712 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2713 cerr << "Op.getOpcode() = " << Opc << "\n";
2714 cerr << "*Op.getNode():\n";
2722 /* Otherwise, return unchanged */
2725 //===----------------------------------------------------------------------===//
2726 // Target Optimization Hooks
2727 //===----------------------------------------------------------------------===//
2730 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2733 TargetMachine &TM = getTargetMachine();
2735 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2736 SelectionDAG &DAG = DCI.DAG;
2737 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2738 EVT NodeVT = N->getValueType(0); // The node's value type
2739 EVT Op0VT = Op0.getValueType(); // The first operand's result
2740 SDValue Result; // Initially, empty result
2741 DebugLoc dl = N->getDebugLoc();
2743 switch (N->getOpcode()) {
2746 SDValue Op1 = N->getOperand(1);
2748 if (Op0.getOpcode() == SPUISD::IndirectAddr
2749 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2750 // Normalize the operands to reduce repeated code
2751 SDValue IndirectArg = Op0, AddArg = Op1;
2753 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2758 if (isa<ConstantSDNode>(AddArg)) {
2759 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2760 SDValue IndOp1 = IndirectArg.getOperand(1);
2762 if (CN0->isNullValue()) {
2763 // (add (SPUindirect <arg>, <arg>), 0) ->
2764 // (SPUindirect <arg>, <arg>)
2766 #if !defined(NDEBUG)
2767 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2769 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2770 << "With: (SPUindirect <arg>, <arg>)\n";
2775 } else if (isa<ConstantSDNode>(IndOp1)) {
2776 // (add (SPUindirect <arg>, <const>), <const>) ->
2777 // (SPUindirect <arg>, <const + const>)
2778 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2779 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2780 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2782 #if !defined(NDEBUG)
2783 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2785 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2786 << "), " << CN0->getSExtValue() << ")\n"
2787 << "With: (SPUindirect <arg>, "
2788 << combinedConst << ")\n";
2792 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2793 IndirectArg, combinedValue);
2799 case ISD::SIGN_EXTEND:
2800 case ISD::ZERO_EXTEND:
2801 case ISD::ANY_EXTEND: {
2802 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2803 // (any_extend (SPUextract_elt0 <arg>)) ->
2804 // (SPUextract_elt0 <arg>)
2805 // Types must match, however...
2806 #if !defined(NDEBUG)
2807 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2808 cerr << "\nReplace: ";
2811 Op0.getNode()->dump(&DAG);
2820 case SPUISD::IndirectAddr: {
2821 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2822 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2823 if (CN != 0 && CN->getZExtValue() == 0) {
2824 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2825 // (SPUaform <addr>, 0)
2827 DEBUG(cerr << "Replace: ");
2828 DEBUG(N->dump(&DAG));
2829 DEBUG(cerr << "\nWith: ");
2830 DEBUG(Op0.getNode()->dump(&DAG));
2831 DEBUG(cerr << "\n");
2835 } else if (Op0.getOpcode() == ISD::ADD) {
2836 SDValue Op1 = N->getOperand(1);
2837 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2838 // (SPUindirect (add <arg>, <arg>), 0) ->
2839 // (SPUindirect <arg>, <arg>)
2840 if (CN1->isNullValue()) {
2842 #if !defined(NDEBUG)
2843 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2845 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2846 << "With: (SPUindirect <arg>, <arg>)\n";
2850 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2851 Op0.getOperand(0), Op0.getOperand(1));
2857 case SPUISD::SHLQUAD_L_BITS:
2858 case SPUISD::SHLQUAD_L_BYTES:
2859 case SPUISD::VEC_SHL:
2860 case SPUISD::VEC_SRL:
2861 case SPUISD::VEC_SRA:
2862 case SPUISD::ROTBYTES_LEFT: {
2863 SDValue Op1 = N->getOperand(1);
2865 // Kill degenerate vector shifts:
2866 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2867 if (CN->isNullValue()) {
2873 case SPUISD::PREFSLOT2VEC: {
2874 switch (Op0.getOpcode()) {
2877 case ISD::ANY_EXTEND:
2878 case ISD::ZERO_EXTEND:
2879 case ISD::SIGN_EXTEND: {
2880 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2882 // but only if the SPUprefslot2vec and <arg> types match.
2883 SDValue Op00 = Op0.getOperand(0);
2884 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2885 SDValue Op000 = Op00.getOperand(0);
2886 if (Op000.getValueType() == NodeVT) {
2892 case SPUISD::VEC2PREFSLOT: {
2893 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2895 Result = Op0.getOperand(0);
2903 // Otherwise, return unchanged.
2905 if (Result.getNode()) {
2906 DEBUG(cerr << "\nReplace.SPU: ");
2907 DEBUG(N->dump(&DAG));
2908 DEBUG(cerr << "\nWith: ");
2909 DEBUG(Result.getNode()->dump(&DAG));
2910 DEBUG(cerr << "\n");
2917 //===----------------------------------------------------------------------===//
2918 // Inline Assembly Support
2919 //===----------------------------------------------------------------------===//
2921 /// getConstraintType - Given a constraint letter, return the type of
2922 /// constraint it is for this target.
2923 SPUTargetLowering::ConstraintType
2924 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2925 if (ConstraintLetter.size() == 1) {
2926 switch (ConstraintLetter[0]) {
2933 return C_RegisterClass;
2936 return TargetLowering::getConstraintType(ConstraintLetter);
2939 std::pair<unsigned, const TargetRegisterClass*>
2940 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2943 if (Constraint.size() == 1) {
2944 // GCC RS6000 Constraint Letters
2945 switch (Constraint[0]) {
2949 return std::make_pair(0U, SPU::R64CRegisterClass);
2950 return std::make_pair(0U, SPU::R32CRegisterClass);
2953 return std::make_pair(0U, SPU::R32FPRegisterClass);
2954 else if (VT == EVT::f64)
2955 return std::make_pair(0U, SPU::R64FPRegisterClass);
2958 return std::make_pair(0U, SPU::GPRCRegisterClass);
2962 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2965 //! Compute used/known bits for a SPU operand
2967 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2971 const SelectionDAG &DAG,
2972 unsigned Depth ) const {
2974 const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
2976 switch (Op.getOpcode()) {
2978 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2984 case SPUISD::PREFSLOT2VEC:
2985 case SPUISD::LDRESULT:
2986 case SPUISD::VEC2PREFSLOT:
2987 case SPUISD::SHLQUAD_L_BITS:
2988 case SPUISD::SHLQUAD_L_BYTES:
2989 case SPUISD::VEC_SHL:
2990 case SPUISD::VEC_SRL:
2991 case SPUISD::VEC_SRA:
2992 case SPUISD::VEC_ROTL:
2993 case SPUISD::VEC_ROTR:
2994 case SPUISD::ROTBYTES_LEFT:
2995 case SPUISD::SELECT_MASK:
3002 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3003 unsigned Depth) const {
3004 switch (Op.getOpcode()) {
3009 EVT VT = Op.getValueType();
3011 if (VT != EVT::i8 && VT != EVT::i16 && VT != EVT::i32) {
3014 return VT.getSizeInBits();
3019 // LowerAsmOperandForConstraint
3021 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3022 char ConstraintLetter,
3024 std::vector<SDValue> &Ops,
3025 SelectionDAG &DAG) const {
3026 // Default, for the time being, to the base class handler
3027 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3031 /// isLegalAddressImmediate - Return true if the integer value can be used
3032 /// as the offset of the target addressing mode.
3033 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3034 const Type *Ty) const {
3035 // SPU's addresses are 256K:
3036 return (V > -(1 << 18) && V < (1 << 18) - 1);
3039 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3044 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3045 // The SPU target isn't yet aware of offsets.