2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/Constants.h"
19 #include "llvm/Function.h"
20 #include "llvm/Intrinsics.h"
21 #include "llvm/CallingConv.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/SelectionDAG.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/Target/TargetOptions.h"
30 #include "llvm/ADT/VectorExtras.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/MathExtras.h"
34 #include "llvm/Support/raw_ostream.h"
39 // Used in getTargetNodeName() below
41 std::map<unsigned, const char *> node_names;
43 //! EVT mapping to useful data for Cell SPU
44 struct valtype_map_s {
49 const valtype_map_s valtype_map[] = {
60 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
62 const valtype_map_s *getValueTypeMapEntry(EVT VT) {
63 const valtype_map_s *retval = 0;
65 for (size_t i = 0; i < n_valtype_map; ++i) {
66 if (valtype_map[i].valtype == VT) {
67 retval = valtype_map + i;
75 raw_string_ostream Msg(msg);
76 Msg << "getValueTypeMapEntry returns NULL for "
78 llvm_report_error(Msg.str());
85 //! Expand a library call into an actual call DAG node
88 This code is taken from SelectionDAGLegalize, since it is not exposed as
89 part of the LLVM SelectionDAG API.
93 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
94 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
95 // The input chain to this libcall is the entry node of the function.
96 // Legalizing the call will automatically add the previous call to the
98 SDValue InChain = DAG.getEntryNode();
100 TargetLowering::ArgListTy Args;
101 TargetLowering::ArgListEntry Entry;
102 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
103 EVT ArgVT = Op.getOperand(i).getValueType();
104 const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
105 Entry.Node = Op.getOperand(i);
107 Entry.isSExt = isSigned;
108 Entry.isZExt = !isSigned;
109 Args.push_back(Entry);
111 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
114 // Splice the libcall in wherever FindInputOutputChains tells us to.
116 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
117 std::pair<SDValue, SDValue> CallInfo =
118 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
119 0, TLI.getLibcallCallingConv(LC), false,
120 /*isReturnValueUsed=*/true,
121 Callee, Args, DAG, Op.getDebugLoc());
123 return CallInfo.first;
127 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
128 : TargetLowering(TM, new TargetLoweringObjectFileELF()),
130 // Fold away setcc operations if possible.
133 // Use _setjmp/_longjmp instead of setjmp/longjmp.
134 setUseUnderscoreSetJmp(true);
135 setUseUnderscoreLongJmp(true);
137 // Set RTLIB libcall names as used by SPU:
138 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
140 // Set up the SPU's register classes:
141 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
142 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
143 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
144 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
145 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
146 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
147 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
149 // SPU has no sign or zero extended loads for i1, i8, i16:
150 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
151 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
152 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
154 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
155 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
157 setTruncStoreAction(MVT::i128, MVT::i64, Expand);
158 setTruncStoreAction(MVT::i128, MVT::i32, Expand);
159 setTruncStoreAction(MVT::i128, MVT::i16, Expand);
160 setTruncStoreAction(MVT::i128, MVT::i8, Expand);
162 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
164 // SPU constant load actions are custom lowered:
165 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
166 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
168 // SPU's loads and stores have to be custom lowered:
169 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
171 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
173 setOperationAction(ISD::LOAD, VT, Custom);
174 setOperationAction(ISD::STORE, VT, Custom);
175 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
176 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
177 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
179 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
180 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
181 setTruncStoreAction(VT, StoreVT, Expand);
185 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
187 MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
189 setOperationAction(ISD::LOAD, VT, Custom);
190 setOperationAction(ISD::STORE, VT, Custom);
192 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
193 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
194 setTruncStoreAction(VT, StoreVT, Expand);
198 // Expand the jumptable branches
199 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
200 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
202 // Custom lower SELECT_CC for most cases, but expand by default
203 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
204 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
205 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
206 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
207 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
209 // SPU has no intrinsics for these particular operations:
210 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
212 // SPU has no division/remainder instructions
213 setOperationAction(ISD::SREM, MVT::i8, Expand);
214 setOperationAction(ISD::UREM, MVT::i8, Expand);
215 setOperationAction(ISD::SDIV, MVT::i8, Expand);
216 setOperationAction(ISD::UDIV, MVT::i8, Expand);
217 setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
218 setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
219 setOperationAction(ISD::SREM, MVT::i16, Expand);
220 setOperationAction(ISD::UREM, MVT::i16, Expand);
221 setOperationAction(ISD::SDIV, MVT::i16, Expand);
222 setOperationAction(ISD::UDIV, MVT::i16, Expand);
223 setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
224 setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
225 setOperationAction(ISD::SREM, MVT::i32, Expand);
226 setOperationAction(ISD::UREM, MVT::i32, Expand);
227 setOperationAction(ISD::SDIV, MVT::i32, Expand);
228 setOperationAction(ISD::UDIV, MVT::i32, Expand);
229 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
230 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
231 setOperationAction(ISD::SREM, MVT::i64, Expand);
232 setOperationAction(ISD::UREM, MVT::i64, Expand);
233 setOperationAction(ISD::SDIV, MVT::i64, Expand);
234 setOperationAction(ISD::UDIV, MVT::i64, Expand);
235 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
236 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
237 setOperationAction(ISD::SREM, MVT::i128, Expand);
238 setOperationAction(ISD::UREM, MVT::i128, Expand);
239 setOperationAction(ISD::SDIV, MVT::i128, Expand);
240 setOperationAction(ISD::UDIV, MVT::i128, Expand);
241 setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
242 setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
244 // We don't support sin/cos/sqrt/fmod
245 setOperationAction(ISD::FSIN , MVT::f64, Expand);
246 setOperationAction(ISD::FCOS , MVT::f64, Expand);
247 setOperationAction(ISD::FREM , MVT::f64, Expand);
248 setOperationAction(ISD::FSIN , MVT::f32, Expand);
249 setOperationAction(ISD::FCOS , MVT::f32, Expand);
250 setOperationAction(ISD::FREM , MVT::f32, Expand);
252 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
254 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
255 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
257 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
258 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
260 // SPU can do rotate right and left, so legalize it... but customize for i8
261 // because instructions don't exist.
263 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
265 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
266 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
267 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
269 setOperationAction(ISD::ROTL, MVT::i32, Legal);
270 setOperationAction(ISD::ROTL, MVT::i16, Legal);
271 setOperationAction(ISD::ROTL, MVT::i8, Custom);
273 // SPU has no native version of shift left/right for i8
274 setOperationAction(ISD::SHL, MVT::i8, Custom);
275 setOperationAction(ISD::SRL, MVT::i8, Custom);
276 setOperationAction(ISD::SRA, MVT::i8, Custom);
278 // Make these operations legal and handle them during instruction selection:
279 setOperationAction(ISD::SHL, MVT::i64, Legal);
280 setOperationAction(ISD::SRL, MVT::i64, Legal);
281 setOperationAction(ISD::SRA, MVT::i64, Legal);
283 // Custom lower i8, i32 and i64 multiplications
284 setOperationAction(ISD::MUL, MVT::i8, Custom);
285 setOperationAction(ISD::MUL, MVT::i32, Legal);
286 setOperationAction(ISD::MUL, MVT::i64, Legal);
288 // Expand double-width multiplication
289 // FIXME: It would probably be reasonable to support some of these operations
290 setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
291 setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
292 setOperationAction(ISD::MULHU, MVT::i8, Expand);
293 setOperationAction(ISD::MULHS, MVT::i8, Expand);
294 setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
295 setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
296 setOperationAction(ISD::MULHU, MVT::i16, Expand);
297 setOperationAction(ISD::MULHS, MVT::i16, Expand);
298 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
299 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
300 setOperationAction(ISD::MULHU, MVT::i32, Expand);
301 setOperationAction(ISD::MULHS, MVT::i32, Expand);
302 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
303 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
304 setOperationAction(ISD::MULHU, MVT::i64, Expand);
305 setOperationAction(ISD::MULHS, MVT::i64, Expand);
307 // Need to custom handle (some) common i8, i64 math ops
308 setOperationAction(ISD::ADD, MVT::i8, Custom);
309 setOperationAction(ISD::ADD, MVT::i64, Legal);
310 setOperationAction(ISD::SUB, MVT::i8, Custom);
311 setOperationAction(ISD::SUB, MVT::i64, Legal);
313 // SPU does not have BSWAP. It does have i32 support CTLZ.
314 // CTPOP has to be custom lowered.
315 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
316 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
318 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
319 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
320 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
321 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
322 setOperationAction(ISD::CTPOP, MVT::i128, Expand);
324 setOperationAction(ISD::CTTZ , MVT::i8, Expand);
325 setOperationAction(ISD::CTTZ , MVT::i16, Expand);
326 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
327 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
328 setOperationAction(ISD::CTTZ , MVT::i128, Expand);
330 setOperationAction(ISD::CTLZ , MVT::i8, Promote);
331 setOperationAction(ISD::CTLZ , MVT::i16, Promote);
332 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
333 setOperationAction(ISD::CTLZ , MVT::i64, Expand);
334 setOperationAction(ISD::CTLZ , MVT::i128, Expand);
336 // SPU has a version of select that implements (a&~c)|(b&c), just like
337 // select ought to work:
338 setOperationAction(ISD::SELECT, MVT::i8, Legal);
339 setOperationAction(ISD::SELECT, MVT::i16, Legal);
340 setOperationAction(ISD::SELECT, MVT::i32, Legal);
341 setOperationAction(ISD::SELECT, MVT::i64, Legal);
343 setOperationAction(ISD::SETCC, MVT::i8, Legal);
344 setOperationAction(ISD::SETCC, MVT::i16, Legal);
345 setOperationAction(ISD::SETCC, MVT::i32, Legal);
346 setOperationAction(ISD::SETCC, MVT::i64, Legal);
347 setOperationAction(ISD::SETCC, MVT::f64, Custom);
349 // Custom lower i128 -> i64 truncates
350 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
352 // Custom lower i32/i64 -> i128 sign extend
353 setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
355 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
356 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
357 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
358 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
359 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
360 // to expand to a libcall, hence the custom lowering:
361 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
362 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
363 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
364 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
365 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
366 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
368 // FDIV on SPU requires custom lowering
369 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
371 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
372 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
373 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
374 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
375 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
376 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
377 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
378 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
379 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
381 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
382 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
383 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
384 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
386 // We cannot sextinreg(i1). Expand to shifts.
387 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
389 // We want to legalize GlobalAddress and ConstantPool nodes into the
390 // appropriate instructions to materialize the address.
391 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
393 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
395 setOperationAction(ISD::GlobalAddress, VT, Custom);
396 setOperationAction(ISD::ConstantPool, VT, Custom);
397 setOperationAction(ISD::JumpTable, VT, Custom);
400 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
401 setOperationAction(ISD::VASTART , MVT::Other, Custom);
403 // Use the default implementation.
404 setOperationAction(ISD::VAARG , MVT::Other, Expand);
405 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
406 setOperationAction(ISD::VAEND , MVT::Other, Expand);
407 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
408 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
409 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
410 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
412 // Cell SPU has instructions for converting between i64 and fp.
413 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
414 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
416 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
417 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
419 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
420 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
422 // First set operation action for all vector types to expand. Then we
423 // will selectively turn on ones that can be effectively codegen'd.
424 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
425 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
426 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
427 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
428 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
429 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
431 // "Odd size" vector classes that we're willing to support:
432 addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
434 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
435 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
436 MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
438 // add/sub are legal for all supported vector VT's.
439 setOperationAction(ISD::ADD, VT, Legal);
440 setOperationAction(ISD::SUB, VT, Legal);
441 // mul has to be custom lowered.
442 setOperationAction(ISD::MUL, VT, Legal);
444 setOperationAction(ISD::AND, VT, Legal);
445 setOperationAction(ISD::OR, VT, Legal);
446 setOperationAction(ISD::XOR, VT, Legal);
447 setOperationAction(ISD::LOAD, VT, Legal);
448 setOperationAction(ISD::SELECT, VT, Legal);
449 setOperationAction(ISD::STORE, VT, Legal);
451 // These operations need to be expanded:
452 setOperationAction(ISD::SDIV, VT, Expand);
453 setOperationAction(ISD::SREM, VT, Expand);
454 setOperationAction(ISD::UDIV, VT, Expand);
455 setOperationAction(ISD::UREM, VT, Expand);
457 // Custom lower build_vector, constant pool spills, insert and
458 // extract vector elements:
459 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
460 setOperationAction(ISD::ConstantPool, VT, Custom);
461 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
462 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
463 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
464 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
467 setOperationAction(ISD::AND, MVT::v16i8, Custom);
468 setOperationAction(ISD::OR, MVT::v16i8, Custom);
469 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
470 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
472 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
474 setShiftAmountType(MVT::i32);
475 setBooleanContents(ZeroOrNegativeOneBooleanContent);
477 setStackPointerRegisterToSaveRestore(SPU::R1);
479 // We have target-specific dag combine patterns for the following nodes:
480 setTargetDAGCombine(ISD::ADD);
481 setTargetDAGCombine(ISD::ZERO_EXTEND);
482 setTargetDAGCombine(ISD::SIGN_EXTEND);
483 setTargetDAGCombine(ISD::ANY_EXTEND);
485 computeRegisterProperties();
487 // Set pre-RA register scheduler default to BURR, which produces slightly
488 // better code than the default (could also be TDRR, but TargetLowering.h
489 // needs a mod to support that model):
490 setSchedulingPreference(SchedulingForRegPressure);
494 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
496 if (node_names.empty()) {
497 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
498 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
499 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
500 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
501 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
502 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
503 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
504 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
505 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
506 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
507 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
508 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
509 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
510 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
511 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
512 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
513 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
514 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
515 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
516 "SPUISD::ROTBYTES_LEFT_BITS";
517 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
518 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
519 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
520 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
521 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
524 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
526 return ((i != node_names.end()) ? i->second : 0);
529 /// getFunctionAlignment - Return the Log2 alignment of this function.
530 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
534 //===----------------------------------------------------------------------===//
535 // Return the Cell SPU's SETCC result type
536 //===----------------------------------------------------------------------===//
538 MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
539 // i16 and i32 are valid SETCC result types
540 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
541 VT.getSimpleVT().SimpleTy :
545 //===----------------------------------------------------------------------===//
546 // Calling convention code:
547 //===----------------------------------------------------------------------===//
549 #include "SPUGenCallingConv.inc"
551 //===----------------------------------------------------------------------===//
552 // LowerOperation implementation
553 //===----------------------------------------------------------------------===//
555 /// Custom lower loads for CellSPU
557 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
558 within a 16-byte block, we have to rotate to extract the requested element.
560 For extending loads, we also want to ensure that the following sequence is
561 emitted, e.g. for MVT::f32 extending load to MVT::f64:
565 %2 v16i8,ch = rotate %1
566 %3 v4f8, ch = bitconvert %2
567 %4 f32 = vec2perfslot %3
568 %5 f64 = fp_extend %4
572 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
573 LoadSDNode *LN = cast<LoadSDNode>(Op);
574 SDValue the_chain = LN->getChain();
575 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
576 EVT InVT = LN->getMemoryVT();
577 EVT OutVT = Op.getValueType();
578 ISD::LoadExtType ExtType = LN->getExtensionType();
579 unsigned alignment = LN->getAlignment();
580 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
581 DebugLoc dl = Op.getDebugLoc();
583 switch (LN->getAddressingMode()) {
584 case ISD::UNINDEXED: {
586 SDValue basePtr = LN->getBasePtr();
589 if (alignment == 16) {
592 // Special cases for a known aligned load to simplify the base pointer
593 // and the rotation amount:
594 if (basePtr.getOpcode() == ISD::ADD
595 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
596 // Known offset into basePtr
597 int64_t offset = CN->getSExtValue();
598 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
603 rotate = DAG.getConstant(rotamt, MVT::i16);
605 // Simplify the base pointer for this case:
606 basePtr = basePtr.getOperand(0);
607 if ((offset & ~0xf) > 0) {
608 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
610 DAG.getConstant((offset & ~0xf), PtrVT));
612 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
613 || (basePtr.getOpcode() == SPUISD::IndirectAddr
614 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
615 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
616 // Plain aligned a-form address: rotate into preferred slot
617 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
618 int64_t rotamt = -vtm->prefslot_byte;
621 rotate = DAG.getConstant(rotamt, MVT::i16);
623 // Offset the rotate amount by the basePtr and the preferred slot
625 int64_t rotamt = -vtm->prefslot_byte;
628 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
630 DAG.getConstant(rotamt, PtrVT));
633 // Unaligned load: must be more pessimistic about addressing modes:
634 if (basePtr.getOpcode() == ISD::ADD) {
635 MachineFunction &MF = DAG.getMachineFunction();
636 MachineRegisterInfo &RegInfo = MF.getRegInfo();
637 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
640 SDValue Op0 = basePtr.getOperand(0);
641 SDValue Op1 = basePtr.getOperand(1);
643 if (isa<ConstantSDNode>(Op1)) {
644 // Convert the (add <ptr>, <const>) to an indirect address contained
645 // in a register. Note that this is done because we need to avoid
646 // creating a 0(reg) d-form address due to the SPU's block loads.
647 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
648 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
649 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
651 // Convert the (add <arg1>, <arg2>) to an indirect address, which
652 // will likely be lowered as a reg(reg) x-form address.
653 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
656 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
658 DAG.getConstant(0, PtrVT));
661 // Offset the rotate amount by the basePtr and the preferred slot
663 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
665 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
668 // Re-emit as a v16i8 vector load
669 result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
670 LN->getSrcValue(), LN->getSrcValueOffset(),
671 LN->isVolatile(), LN->isNonTemporal(), 16);
674 the_chain = result.getValue(1);
676 // Rotate into the preferred slot:
677 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
678 result.getValue(0), rotate);
680 // Convert the loaded v16i8 vector to the appropriate vector type
681 // specified by the operand:
682 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
683 InVT, (128 / InVT.getSizeInBits()));
684 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
685 DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
687 // Handle extending loads by extending the scalar result:
688 if (ExtType == ISD::SEXTLOAD) {
689 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
690 } else if (ExtType == ISD::ZEXTLOAD) {
691 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
692 } else if (ExtType == ISD::EXTLOAD) {
693 unsigned NewOpc = ISD::ANY_EXTEND;
695 if (OutVT.isFloatingPoint())
696 NewOpc = ISD::FP_EXTEND;
698 result = DAG.getNode(NewOpc, dl, OutVT, result);
701 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
702 SDValue retops[2] = {
707 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
708 retops, sizeof(retops) / sizeof(retops[0]));
715 case ISD::LAST_INDEXED_MODE:
718 raw_string_ostream Msg(msg);
719 Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
721 Msg << (unsigned) LN->getAddressingMode();
722 llvm_report_error(Msg.str());
730 /// Custom lower stores for CellSPU
732 All CellSPU stores are aligned to 16-byte boundaries, so for elements
733 within a 16-byte block, we have to generate a shuffle to insert the
734 requested element into its place, then store the resulting block.
737 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
738 StoreSDNode *SN = cast<StoreSDNode>(Op);
739 SDValue Value = SN->getValue();
740 EVT VT = Value.getValueType();
741 EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
742 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
743 DebugLoc dl = Op.getDebugLoc();
744 unsigned alignment = SN->getAlignment();
746 switch (SN->getAddressingMode()) {
747 case ISD::UNINDEXED: {
748 // The vector type we really want to load from the 16-byte chunk.
749 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
750 VT, (128 / VT.getSizeInBits()));
752 SDValue alignLoadVec;
753 SDValue basePtr = SN->getBasePtr();
754 SDValue the_chain = SN->getChain();
755 SDValue insertEltOffs;
757 if (alignment == 16) {
760 // Special cases for a known aligned load to simplify the base pointer
761 // and insertion byte:
762 if (basePtr.getOpcode() == ISD::ADD
763 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
764 // Known offset into basePtr
765 int64_t offset = CN->getSExtValue();
767 // Simplify the base pointer for this case:
768 basePtr = basePtr.getOperand(0);
769 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
771 DAG.getConstant((offset & 0xf), PtrVT));
773 if ((offset & ~0xf) > 0) {
774 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
776 DAG.getConstant((offset & ~0xf), PtrVT));
779 // Otherwise, assume it's at byte 0 of basePtr
780 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
782 DAG.getConstant(0, PtrVT));
785 // Unaligned load: must be more pessimistic about addressing modes:
786 if (basePtr.getOpcode() == ISD::ADD) {
787 MachineFunction &MF = DAG.getMachineFunction();
788 MachineRegisterInfo &RegInfo = MF.getRegInfo();
789 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
792 SDValue Op0 = basePtr.getOperand(0);
793 SDValue Op1 = basePtr.getOperand(1);
795 if (isa<ConstantSDNode>(Op1)) {
796 // Convert the (add <ptr>, <const>) to an indirect address contained
797 // in a register. Note that this is done because we need to avoid
798 // creating a 0(reg) d-form address due to the SPU's block loads.
799 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
800 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
801 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
803 // Convert the (add <arg1>, <arg2>) to an indirect address, which
804 // will likely be lowered as a reg(reg) x-form address.
805 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
808 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
810 DAG.getConstant(0, PtrVT));
813 // Insertion point is solely determined by basePtr's contents
814 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
816 DAG.getConstant(0, PtrVT));
819 // Re-emit as a v16i8 vector load
820 alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
821 SN->getSrcValue(), SN->getSrcValueOffset(),
822 SN->isVolatile(), SN->isNonTemporal(), 16);
825 the_chain = alignLoadVec.getValue(1);
827 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
828 SDValue theValue = SN->getValue();
832 && (theValue.getOpcode() == ISD::AssertZext
833 || theValue.getOpcode() == ISD::AssertSext)) {
834 // Drill down and get the value for zero- and sign-extended
836 theValue = theValue.getOperand(0);
839 // If the base pointer is already a D-form address, then just create
840 // a new D-form address with a slot offset and the orignal base pointer.
841 // Otherwise generate a D-form address with the slot offset relative
842 // to the stack pointer, which is always aligned.
844 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
845 errs() << "CellSPU LowerSTORE: basePtr = ";
846 basePtr.getNode()->dump(&DAG);
851 SDValue insertEltOp =
852 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
853 SDValue vectorizeOp =
854 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
856 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
857 vectorizeOp, alignLoadVec,
858 DAG.getNode(ISD::BIT_CONVERT, dl,
859 MVT::v4i32, insertEltOp));
861 result = DAG.getStore(the_chain, dl, result, basePtr,
862 LN->getSrcValue(), LN->getSrcValueOffset(),
863 LN->isVolatile(), LN->isNonTemporal(),
866 #if 0 && !defined(NDEBUG)
867 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
868 const SDValue ¤tRoot = DAG.getRoot();
871 errs() << "------- CellSPU:LowerStore result:\n";
873 errs() << "-------\n";
874 DAG.setRoot(currentRoot);
885 case ISD::LAST_INDEXED_MODE:
888 raw_string_ostream Msg(msg);
889 Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
891 Msg << (unsigned) SN->getAddressingMode();
892 llvm_report_error(Msg.str());
900 //! Generate the address of a constant pool entry.
902 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
903 EVT PtrVT = Op.getValueType();
904 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
905 Constant *C = CP->getConstVal();
906 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
907 SDValue Zero = DAG.getConstant(0, PtrVT);
908 const TargetMachine &TM = DAG.getTarget();
909 // FIXME there is no actual debug info here
910 DebugLoc dl = Op.getDebugLoc();
912 if (TM.getRelocationModel() == Reloc::Static) {
913 if (!ST->usingLargeMem()) {
914 // Just return the SDValue with the constant pool address in it.
915 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
917 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
918 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
919 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
923 llvm_unreachable("LowerConstantPool: Relocation model other than static"
928 //! Alternate entry point for generating the address of a constant pool entry
930 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
931 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
935 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
936 EVT PtrVT = Op.getValueType();
937 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
938 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
939 SDValue Zero = DAG.getConstant(0, PtrVT);
940 const TargetMachine &TM = DAG.getTarget();
941 // FIXME there is no actual debug info here
942 DebugLoc dl = Op.getDebugLoc();
944 if (TM.getRelocationModel() == Reloc::Static) {
945 if (!ST->usingLargeMem()) {
946 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
948 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
949 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
950 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
954 llvm_unreachable("LowerJumpTable: Relocation model other than static"
960 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
961 EVT PtrVT = Op.getValueType();
962 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
963 GlobalValue *GV = GSDN->getGlobal();
964 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
965 const TargetMachine &TM = DAG.getTarget();
966 SDValue Zero = DAG.getConstant(0, PtrVT);
967 // FIXME there is no actual debug info here
968 DebugLoc dl = Op.getDebugLoc();
970 if (TM.getRelocationModel() == Reloc::Static) {
971 if (!ST->usingLargeMem()) {
972 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
974 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
975 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
976 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
979 llvm_report_error("LowerGlobalAddress: Relocation model other than static"
987 //! Custom lower double precision floating point constants
989 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
990 EVT VT = Op.getValueType();
991 // FIXME there is no actual debug info here
992 DebugLoc dl = Op.getDebugLoc();
994 if (VT == MVT::f64) {
995 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
998 "LowerConstantFP: Node is not ConstantFPSDNode");
1000 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1001 SDValue T = DAG.getConstant(dbits, MVT::i64);
1002 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1003 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1004 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
1011 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1012 CallingConv::ID CallConv, bool isVarArg,
1013 const SmallVectorImpl<ISD::InputArg>
1015 DebugLoc dl, SelectionDAG &DAG,
1016 SmallVectorImpl<SDValue> &InVals) {
1018 MachineFunction &MF = DAG.getMachineFunction();
1019 MachineFrameInfo *MFI = MF.getFrameInfo();
1020 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1022 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1023 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1025 unsigned ArgOffset = SPUFrameInfo::minStackSize();
1026 unsigned ArgRegIdx = 0;
1027 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1029 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1031 // Add DAG nodes to load the arguments or copy them out of registers.
1032 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1033 EVT ObjectVT = Ins[ArgNo].VT;
1034 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1037 if (ArgRegIdx < NumArgRegs) {
1038 const TargetRegisterClass *ArgRegClass;
1040 switch (ObjectVT.getSimpleVT().SimpleTy) {
1043 raw_string_ostream Msg(msg);
1044 Msg << "LowerFormalArguments Unhandled argument type: "
1045 << ObjectVT.getEVTString();
1046 llvm_report_error(Msg.str());
1049 ArgRegClass = &SPU::R8CRegClass;
1052 ArgRegClass = &SPU::R16CRegClass;
1055 ArgRegClass = &SPU::R32CRegClass;
1058 ArgRegClass = &SPU::R64CRegClass;
1061 ArgRegClass = &SPU::GPRCRegClass;
1064 ArgRegClass = &SPU::R32FPRegClass;
1067 ArgRegClass = &SPU::R64FPRegClass;
1075 ArgRegClass = &SPU::VECREGRegClass;
1079 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1080 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1081 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1084 // We need to load the argument to a virtual register if we determined
1085 // above that we ran out of physical registers of the appropriate type
1086 // or we're forced to do vararg
1087 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false);
1088 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1089 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0);
1090 ArgOffset += StackSlotSize;
1093 InVals.push_back(ArgVal);
1095 Chain = ArgVal.getOperand(0);
1100 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1101 // We will spill (79-3)+1 registers to the stack
1102 SmallVector<SDValue, 79-3+1> MemOps;
1104 // Create the frame slot
1106 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1107 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
1109 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1110 unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
1111 SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
1112 SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
1114 Chain = Store.getOperand(0);
1115 MemOps.push_back(Store);
1117 // Increment address by stack slot size for the next stored argument
1118 ArgOffset += StackSlotSize;
1120 if (!MemOps.empty())
1121 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1122 &MemOps[0], MemOps.size());
1128 /// isLSAAddress - Return the immediate to use if the specified
1129 /// value is representable as a LSA address.
1130 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1131 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1134 int Addr = C->getZExtValue();
1135 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1136 (Addr << 14 >> 14) != Addr)
1137 return 0; // Top 14 bits have to be sext of immediate.
1139 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1143 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1144 CallingConv::ID CallConv, bool isVarArg,
1146 const SmallVectorImpl<ISD::OutputArg> &Outs,
1147 const SmallVectorImpl<ISD::InputArg> &Ins,
1148 DebugLoc dl, SelectionDAG &DAG,
1149 SmallVectorImpl<SDValue> &InVals) {
1150 // CellSPU target does not yet support tail call optimization.
1153 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1154 unsigned NumOps = Outs.size();
1155 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1156 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1157 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1159 // Handy pointer type
1160 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1162 // Set up a copy of the stack pointer for use loading and storing any
1163 // arguments that may not fit in the registers available for argument
1165 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1167 // Figure out which arguments are going to go in registers, and which in
1169 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1170 unsigned ArgRegIdx = 0;
1172 // Keep track of registers passing arguments
1173 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1174 // And the arguments passed on the stack
1175 SmallVector<SDValue, 8> MemOpChains;
1177 for (unsigned i = 0; i != NumOps; ++i) {
1178 SDValue Arg = Outs[i].Val;
1180 // PtrOff will be used to store the current argument to the stack if a
1181 // register cannot be found for it.
1182 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1183 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1185 switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1186 default: llvm_unreachable("Unexpected ValueType for argument!");
1192 if (ArgRegIdx != NumArgRegs) {
1193 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1195 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
1197 ArgOffset += StackSlotSize;
1202 if (ArgRegIdx != NumArgRegs) {
1203 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1205 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
1207 ArgOffset += StackSlotSize;
1216 if (ArgRegIdx != NumArgRegs) {
1217 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1219 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
1221 ArgOffset += StackSlotSize;
1227 // Accumulate how many bytes are to be pushed on the stack, including the
1228 // linkage area, and parameter passing area. According to the SPU ABI,
1229 // we minimally need space for [LR] and [SP].
1230 unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize();
1232 // Insert a call sequence start
1233 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1236 if (!MemOpChains.empty()) {
1237 // Adjust the stack pointer for the stack arguments.
1238 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1239 &MemOpChains[0], MemOpChains.size());
1242 // Build a sequence of copy-to-reg nodes chained together with token chain
1243 // and flag operands which copy the outgoing args into the appropriate regs.
1245 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1246 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1247 RegsToPass[i].second, InFlag);
1248 InFlag = Chain.getValue(1);
1251 SmallVector<SDValue, 8> Ops;
1252 unsigned CallOpc = SPUISD::CALL;
1254 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1255 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1256 // node so that legalize doesn't hack it.
1257 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1258 GlobalValue *GV = G->getGlobal();
1259 EVT CalleeVT = Callee.getValueType();
1260 SDValue Zero = DAG.getConstant(0, PtrVT);
1261 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1263 if (!ST->usingLargeMem()) {
1264 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1265 // style calls, otherwise, external symbols are BRASL calls. This assumes
1266 // that declared/defined symbols are in the same compilation unit and can
1267 // be reached through PC-relative jumps.
1270 // This may be an unsafe assumption for JIT and really large compilation
1272 if (GV->isDeclaration()) {
1273 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1275 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1278 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1280 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1282 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1283 EVT CalleeVT = Callee.getValueType();
1284 SDValue Zero = DAG.getConstant(0, PtrVT);
1285 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1286 Callee.getValueType());
1288 if (!ST->usingLargeMem()) {
1289 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1291 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1293 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1294 // If this is an absolute destination address that appears to be a legal
1295 // local store address, use the munged value.
1296 Callee = SDValue(Dest, 0);
1299 Ops.push_back(Chain);
1300 Ops.push_back(Callee);
1302 // Add argument registers to the end of the list so that they are known live
1304 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1305 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1306 RegsToPass[i].second.getValueType()));
1308 if (InFlag.getNode())
1309 Ops.push_back(InFlag);
1310 // Returns a chain and a flag for retval copy to use.
1311 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1312 &Ops[0], Ops.size());
1313 InFlag = Chain.getValue(1);
1315 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1316 DAG.getIntPtrConstant(0, true), InFlag);
1318 InFlag = Chain.getValue(1);
1320 // If the function returns void, just return the chain.
1324 // If the call has results, copy the values out of the ret val registers.
1325 switch (Ins[0].VT.getSimpleVT().SimpleTy) {
1326 default: llvm_unreachable("Unexpected ret value!");
1327 case MVT::Other: break;
1329 if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
1330 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1331 MVT::i32, InFlag).getValue(1);
1332 InVals.push_back(Chain.getValue(0));
1333 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1334 Chain.getValue(2)).getValue(1);
1335 InVals.push_back(Chain.getValue(0));
1337 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1338 InFlag).getValue(1);
1339 InVals.push_back(Chain.getValue(0));
1343 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1344 InFlag).getValue(1);
1345 InVals.push_back(Chain.getValue(0));
1348 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1349 InFlag).getValue(1);
1350 InVals.push_back(Chain.getValue(0));
1354 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1355 InFlag).getValue(1);
1356 InVals.push_back(Chain.getValue(0));
1364 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1365 InFlag).getValue(1);
1366 InVals.push_back(Chain.getValue(0));
1374 SPUTargetLowering::LowerReturn(SDValue Chain,
1375 CallingConv::ID CallConv, bool isVarArg,
1376 const SmallVectorImpl<ISD::OutputArg> &Outs,
1377 DebugLoc dl, SelectionDAG &DAG) {
1379 SmallVector<CCValAssign, 16> RVLocs;
1380 CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1381 RVLocs, *DAG.getContext());
1382 CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1384 // If this is the first return lowered for this function, add the regs to the
1385 // liveout set for the function.
1386 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1387 for (unsigned i = 0; i != RVLocs.size(); ++i)
1388 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1393 // Copy the result values into the output registers.
1394 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1395 CCValAssign &VA = RVLocs[i];
1396 assert(VA.isRegLoc() && "Can only return in registers!");
1397 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1399 Flag = Chain.getValue(1);
1403 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1405 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1409 //===----------------------------------------------------------------------===//
1410 // Vector related lowering:
1411 //===----------------------------------------------------------------------===//
1413 static ConstantSDNode *
1414 getVecImm(SDNode *N) {
1415 SDValue OpVal(0, 0);
1417 // Check to see if this buildvec has a single non-undef value in its elements.
1418 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1419 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1420 if (OpVal.getNode() == 0)
1421 OpVal = N->getOperand(i);
1422 else if (OpVal != N->getOperand(i))
1426 if (OpVal.getNode() != 0) {
1427 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1435 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1436 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1438 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1440 if (ConstantSDNode *CN = getVecImm(N)) {
1441 uint64_t Value = CN->getZExtValue();
1442 if (ValueType == MVT::i64) {
1443 uint64_t UValue = CN->getZExtValue();
1444 uint32_t upper = uint32_t(UValue >> 32);
1445 uint32_t lower = uint32_t(UValue);
1448 Value = Value >> 32;
1450 if (Value <= 0x3ffff)
1451 return DAG.getTargetConstant(Value, ValueType);
1457 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1458 /// and the value fits into a signed 16-bit constant, and if so, return the
1460 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1462 if (ConstantSDNode *CN = getVecImm(N)) {
1463 int64_t Value = CN->getSExtValue();
1464 if (ValueType == MVT::i64) {
1465 uint64_t UValue = CN->getZExtValue();
1466 uint32_t upper = uint32_t(UValue >> 32);
1467 uint32_t lower = uint32_t(UValue);
1470 Value = Value >> 32;
1472 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1473 return DAG.getTargetConstant(Value, ValueType);
1480 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1481 /// and the value fits into a signed 10-bit constant, and if so, return the
1483 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1485 if (ConstantSDNode *CN = getVecImm(N)) {
1486 int64_t Value = CN->getSExtValue();
1487 if (ValueType == MVT::i64) {
1488 uint64_t UValue = CN->getZExtValue();
1489 uint32_t upper = uint32_t(UValue >> 32);
1490 uint32_t lower = uint32_t(UValue);
1493 Value = Value >> 32;
1495 if (isInt<10>(Value))
1496 return DAG.getTargetConstant(Value, ValueType);
1502 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1503 /// and the value fits into a signed 8-bit constant, and if so, return the
1506 /// @note: The incoming vector is v16i8 because that's the only way we can load
1507 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1509 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1511 if (ConstantSDNode *CN = getVecImm(N)) {
1512 int Value = (int) CN->getZExtValue();
1513 if (ValueType == MVT::i16
1514 && Value <= 0xffff /* truncated from uint64_t */
1515 && ((short) Value >> 8) == ((short) Value & 0xff))
1516 return DAG.getTargetConstant(Value & 0xff, ValueType);
1517 else if (ValueType == MVT::i8
1518 && (Value & 0xff) == Value)
1519 return DAG.getTargetConstant(Value, ValueType);
1525 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1526 /// and the value fits into a signed 16-bit constant, and if so, return the
1528 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1530 if (ConstantSDNode *CN = getVecImm(N)) {
1531 uint64_t Value = CN->getZExtValue();
1532 if ((ValueType == MVT::i32
1533 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1534 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1535 return DAG.getTargetConstant(Value >> 16, ValueType);
1541 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1542 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1543 if (ConstantSDNode *CN = getVecImm(N)) {
1544 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1550 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1551 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1552 if (ConstantSDNode *CN = getVecImm(N)) {
1553 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1559 //! Lower a BUILD_VECTOR instruction creatively:
1561 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1562 EVT VT = Op.getValueType();
1563 EVT EltVT = VT.getVectorElementType();
1564 DebugLoc dl = Op.getDebugLoc();
1565 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1566 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1567 unsigned minSplatBits = EltVT.getSizeInBits();
1569 if (minSplatBits < 16)
1572 APInt APSplatBits, APSplatUndef;
1573 unsigned SplatBitSize;
1576 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1577 HasAnyUndefs, minSplatBits)
1578 || minSplatBits < SplatBitSize)
1579 return SDValue(); // Wasn't a constant vector or splat exceeded min
1581 uint64_t SplatBits = APSplatBits.getZExtValue();
1583 switch (VT.getSimpleVT().SimpleTy) {
1586 raw_string_ostream Msg(msg);
1587 Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1588 << VT.getEVTString();
1589 llvm_report_error(Msg.str());
1593 uint32_t Value32 = uint32_t(SplatBits);
1594 assert(SplatBitSize == 32
1595 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1596 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1597 SDValue T = DAG.getConstant(Value32, MVT::i32);
1598 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1599 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1603 uint64_t f64val = uint64_t(SplatBits);
1604 assert(SplatBitSize == 64
1605 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1606 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1607 SDValue T = DAG.getConstant(f64val, MVT::i64);
1608 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1609 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1613 // 8-bit constants have to be expanded to 16-bits
1614 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1615 SmallVector<SDValue, 8> Ops;
1617 Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1618 return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1619 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1622 unsigned short Value16 = SplatBits;
1623 SDValue T = DAG.getConstant(Value16, EltVT);
1624 SmallVector<SDValue, 8> Ops;
1627 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1630 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1631 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1634 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1635 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1638 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1648 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1650 uint32_t upper = uint32_t(SplatVal >> 32);
1651 uint32_t lower = uint32_t(SplatVal);
1653 if (upper == lower) {
1654 // Magic constant that can be matched by IL, ILA, et. al.
1655 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1656 return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1657 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1658 Val, Val, Val, Val));
1660 bool upper_special, lower_special;
1662 // NOTE: This code creates common-case shuffle masks that can be easily
1663 // detected as common expressions. It is not attempting to create highly
1664 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1666 // Detect if the upper or lower half is a special shuffle mask pattern:
1667 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1668 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1670 // Both upper and lower are special, lower to a constant pool load:
1671 if (lower_special && upper_special) {
1672 SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1673 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1674 SplatValCN, SplatValCN);
1679 SmallVector<SDValue, 16> ShufBytes;
1682 // Create lower vector if not a special pattern
1683 if (!lower_special) {
1684 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1685 LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1686 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1687 LO32C, LO32C, LO32C, LO32C));
1690 // Create upper vector if not a special pattern
1691 if (!upper_special) {
1692 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1693 HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1694 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1695 HI32C, HI32C, HI32C, HI32C));
1698 // If either upper or lower are special, then the two input operands are
1699 // the same (basically, one of them is a "don't care")
1705 for (int i = 0; i < 4; ++i) {
1707 for (int j = 0; j < 4; ++j) {
1709 bool process_upper, process_lower;
1711 process_upper = (upper_special && (i & 1) == 0);
1712 process_lower = (lower_special && (i & 1) == 1);
1714 if (process_upper || process_lower) {
1715 if ((process_upper && upper == 0)
1716 || (process_lower && lower == 0))
1718 else if ((process_upper && upper == 0xffffffff)
1719 || (process_lower && lower == 0xffffffff))
1721 else if ((process_upper && upper == 0x80000000)
1722 || (process_lower && lower == 0x80000000))
1723 val |= (j == 0 ? 0xe0 : 0x80);
1725 val |= i * 4 + j + ((i & 1) * 16);
1728 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1731 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1732 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1733 &ShufBytes[0], ShufBytes.size()));
1737 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1738 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1739 /// permutation vector, V3, is monotonically increasing with one "exception"
1740 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1741 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1742 /// In either case, the net result is going to eventually invoke SHUFB to
1743 /// permute/shuffle the bytes from V1 and V2.
1745 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1746 /// control word for byte/halfword/word insertion. This takes care of a single
1747 /// element move from V2 into V1.
1749 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1750 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1751 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1752 SDValue V1 = Op.getOperand(0);
1753 SDValue V2 = Op.getOperand(1);
1754 DebugLoc dl = Op.getDebugLoc();
1756 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1758 // If we have a single element being moved from V1 to V2, this can be handled
1759 // using the C*[DX] compute mask instructions, but the vector elements have
1760 // to be monotonically increasing with one exception element.
1761 EVT VecVT = V1.getValueType();
1762 EVT EltVT = VecVT.getVectorElementType();
1763 unsigned EltsFromV2 = 0;
1765 unsigned V2EltIdx0 = 0;
1766 unsigned CurrElt = 0;
1767 unsigned MaxElts = VecVT.getVectorNumElements();
1768 unsigned PrevElt = 0;
1770 bool monotonic = true;
1773 if (EltVT == MVT::i8) {
1775 } else if (EltVT == MVT::i16) {
1777 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1779 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1782 llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1784 for (unsigned i = 0; i != MaxElts; ++i) {
1785 if (SVN->getMaskElt(i) < 0)
1788 unsigned SrcElt = SVN->getMaskElt(i);
1791 if (SrcElt >= V2EltIdx0) {
1792 if (1 >= (++EltsFromV2)) {
1793 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1795 } else if (CurrElt != SrcElt) {
1803 if (PrevElt > 0 && SrcElt < MaxElts) {
1804 if ((PrevElt == SrcElt - 1)
1805 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1812 } else if (PrevElt == 0) {
1813 // First time through, need to keep track of previous element
1816 // This isn't a rotation, takes elements from vector 2
1822 if (EltsFromV2 == 1 && monotonic) {
1823 // Compute mask and shuffle
1824 MachineFunction &MF = DAG.getMachineFunction();
1825 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1826 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1827 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1828 // Initialize temporary register to 0
1829 SDValue InitTempReg =
1830 DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1831 // Copy register's contents as index in SHUFFLE_MASK:
1832 SDValue ShufMaskOp =
1833 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1834 DAG.getTargetConstant(V2Elt, MVT::i32),
1835 DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1836 // Use shuffle mask in SHUFB synthetic instruction:
1837 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1839 } else if (rotate) {
1840 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1842 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1843 V1, DAG.getConstant(rotamt, MVT::i16));
1845 // Convert the SHUFFLE_VECTOR mask's input element units to the
1847 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1849 SmallVector<SDValue, 16> ResultMask;
1850 for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1851 unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1853 for (unsigned j = 0; j < BytesPerElement; ++j)
1854 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1857 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1858 &ResultMask[0], ResultMask.size());
1859 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1863 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1864 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1865 DebugLoc dl = Op.getDebugLoc();
1867 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1868 // For a constant, build the appropriate constant vector, which will
1869 // eventually simplify to a vector register load.
1871 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1872 SmallVector<SDValue, 16> ConstVecValues;
1876 // Create a constant vector:
1877 switch (Op.getValueType().getSimpleVT().SimpleTy) {
1878 default: llvm_unreachable("Unexpected constant value type in "
1879 "LowerSCALAR_TO_VECTOR");
1880 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1881 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1882 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1883 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1884 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1885 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1888 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1889 for (size_t j = 0; j < n_copies; ++j)
1890 ConstVecValues.push_back(CValue);
1892 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1893 &ConstVecValues[0], ConstVecValues.size());
1895 // Otherwise, copy the value from one register to another:
1896 switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1897 default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1904 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1911 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1912 EVT VT = Op.getValueType();
1913 SDValue N = Op.getOperand(0);
1914 SDValue Elt = Op.getOperand(1);
1915 DebugLoc dl = Op.getDebugLoc();
1918 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1919 // Constant argument:
1920 int EltNo = (int) C->getZExtValue();
1923 if (VT == MVT::i8 && EltNo >= 16)
1924 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1925 else if (VT == MVT::i16 && EltNo >= 8)
1926 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1927 else if (VT == MVT::i32 && EltNo >= 4)
1928 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1929 else if (VT == MVT::i64 && EltNo >= 2)
1930 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1932 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1933 // i32 and i64: Element 0 is the preferred slot
1934 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1937 // Need to generate shuffle mask and extract:
1938 int prefslot_begin = -1, prefslot_end = -1;
1939 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1941 switch (VT.getSimpleVT().SimpleTy) {
1943 assert(false && "Invalid value type!");
1945 prefslot_begin = prefslot_end = 3;
1949 prefslot_begin = 2; prefslot_end = 3;
1954 prefslot_begin = 0; prefslot_end = 3;
1959 prefslot_begin = 0; prefslot_end = 7;
1964 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1965 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1967 unsigned int ShufBytes[16] = {
1968 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1970 for (int i = 0; i < 16; ++i) {
1971 // zero fill uppper part of preferred slot, don't care about the
1973 unsigned int mask_val;
1974 if (i <= prefslot_end) {
1976 ((i < prefslot_begin)
1978 : elt_byte + (i - prefslot_begin));
1980 ShufBytes[i] = mask_val;
1982 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1985 SDValue ShufMask[4];
1986 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1987 unsigned bidx = i * 4;
1988 unsigned int bits = ((ShufBytes[bidx] << 24) |
1989 (ShufBytes[bidx+1] << 16) |
1990 (ShufBytes[bidx+2] << 8) |
1992 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1995 SDValue ShufMaskVec =
1996 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1997 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1999 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2000 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
2001 N, N, ShufMaskVec));
2003 // Variable index: Rotate the requested element into slot 0, then replicate
2004 // slot 0 across the vector
2005 EVT VecVT = N.getValueType();
2006 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2007 llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2011 // Make life easier by making sure the index is zero-extended to i32
2012 if (Elt.getValueType() != MVT::i32)
2013 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2015 // Scale the index to a bit/byte shift quantity
2017 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2018 unsigned scaleShift = scaleFactor.logBase2();
2021 if (scaleShift > 0) {
2022 // Scale the shift factor:
2023 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2024 DAG.getConstant(scaleShift, MVT::i32));
2027 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2029 // Replicate the bytes starting at byte 0 across the entire vector (for
2030 // consistency with the notion of a unified register set)
2033 switch (VT.getSimpleVT().SimpleTy) {
2035 llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2039 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2040 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2041 factor, factor, factor, factor);
2045 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2046 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2047 factor, factor, factor, factor);
2052 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2053 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2054 factor, factor, factor, factor);
2059 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2060 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2061 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2062 loFactor, hiFactor, loFactor, hiFactor);
2067 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2068 DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2069 vecShift, vecShift, replicate));
2075 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2076 SDValue VecOp = Op.getOperand(0);
2077 SDValue ValOp = Op.getOperand(1);
2078 SDValue IdxOp = Op.getOperand(2);
2079 DebugLoc dl = Op.getDebugLoc();
2080 EVT VT = Op.getValueType();
2082 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2083 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2085 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2086 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2087 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2088 DAG.getRegister(SPU::R1, PtrVT),
2089 DAG.getConstant(CN->getSExtValue(), PtrVT));
2090 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2093 DAG.getNode(SPUISD::SHUFB, dl, VT,
2094 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2096 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2101 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2102 const TargetLowering &TLI)
2104 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2105 DebugLoc dl = Op.getDebugLoc();
2106 EVT ShiftVT = TLI.getShiftAmountTy();
2108 assert(Op.getValueType() == MVT::i8);
2111 llvm_unreachable("Unhandled i8 math operator");
2115 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2117 SDValue N1 = Op.getOperand(1);
2118 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2119 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2120 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2121 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2126 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2128 SDValue N1 = Op.getOperand(1);
2129 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2130 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2131 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2132 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2136 SDValue N1 = Op.getOperand(1);
2137 EVT N1VT = N1.getValueType();
2139 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2140 if (!N1VT.bitsEq(ShiftVT)) {
2141 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2144 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2147 // Replicate lower 8-bits into upper 8:
2149 DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2150 DAG.getNode(ISD::SHL, dl, MVT::i16,
2151 N0, DAG.getConstant(8, MVT::i32)));
2153 // Truncate back down to i8
2154 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2155 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2159 SDValue N1 = Op.getOperand(1);
2160 EVT N1VT = N1.getValueType();
2162 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2163 if (!N1VT.bitsEq(ShiftVT)) {
2164 unsigned N1Opc = ISD::ZERO_EXTEND;
2166 if (N1.getValueType().bitsGT(ShiftVT))
2167 N1Opc = ISD::TRUNCATE;
2169 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2172 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2173 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2176 SDValue N1 = Op.getOperand(1);
2177 EVT N1VT = N1.getValueType();
2179 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2180 if (!N1VT.bitsEq(ShiftVT)) {
2181 unsigned N1Opc = ISD::SIGN_EXTEND;
2183 if (N1VT.bitsGT(ShiftVT))
2184 N1Opc = ISD::TRUNCATE;
2185 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2188 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2189 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2192 SDValue N1 = Op.getOperand(1);
2194 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2195 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2196 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2197 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2205 //! Lower byte immediate operations for v16i8 vectors:
2207 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2210 EVT VT = Op.getValueType();
2211 DebugLoc dl = Op.getDebugLoc();
2213 ConstVec = Op.getOperand(0);
2214 Arg = Op.getOperand(1);
2215 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2216 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2217 ConstVec = ConstVec.getOperand(0);
2219 ConstVec = Op.getOperand(1);
2220 Arg = Op.getOperand(0);
2221 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2222 ConstVec = ConstVec.getOperand(0);
2227 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2228 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2229 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2231 APInt APSplatBits, APSplatUndef;
2232 unsigned SplatBitSize;
2234 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2236 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2237 HasAnyUndefs, minSplatBits)
2238 && minSplatBits <= SplatBitSize) {
2239 uint64_t SplatBits = APSplatBits.getZExtValue();
2240 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2242 SmallVector<SDValue, 16> tcVec;
2243 tcVec.assign(16, tc);
2244 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2245 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2249 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2250 // lowered. Return the operation, rather than a null SDValue.
2254 //! Custom lowering for CTPOP (count population)
2256 Custom lowering code that counts the number ones in the input
2257 operand. SPU has such an instruction, but it counts the number of
2258 ones per byte, which then have to be accumulated.
2260 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2261 EVT VT = Op.getValueType();
2262 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2263 VT, (128 / VT.getSizeInBits()));
2264 DebugLoc dl = Op.getDebugLoc();
2266 switch (VT.getSimpleVT().SimpleTy) {
2268 assert(false && "Invalid value type!");
2270 SDValue N = Op.getOperand(0);
2271 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2273 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2274 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2276 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2280 MachineFunction &MF = DAG.getMachineFunction();
2281 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2283 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2285 SDValue N = Op.getOperand(0);
2286 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2287 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2288 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2290 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2291 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2293 // CNTB_result becomes the chain to which all of the virtual registers
2294 // CNTB_reg, SUM1_reg become associated:
2295 SDValue CNTB_result =
2296 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2298 SDValue CNTB_rescopy =
2299 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2301 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2303 return DAG.getNode(ISD::AND, dl, MVT::i16,
2304 DAG.getNode(ISD::ADD, dl, MVT::i16,
2305 DAG.getNode(ISD::SRL, dl, MVT::i16,
2312 MachineFunction &MF = DAG.getMachineFunction();
2313 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2315 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2316 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2318 SDValue N = Op.getOperand(0);
2319 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2320 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2321 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2322 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2324 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2325 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2327 // CNTB_result becomes the chain to which all of the virtual registers
2328 // CNTB_reg, SUM1_reg become associated:
2329 SDValue CNTB_result =
2330 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2332 SDValue CNTB_rescopy =
2333 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2336 DAG.getNode(ISD::SRL, dl, MVT::i32,
2337 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2341 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2342 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2344 SDValue Sum1_rescopy =
2345 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2348 DAG.getNode(ISD::SRL, dl, MVT::i32,
2349 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2352 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2353 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2355 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2365 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2367 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2368 All conversions to i64 are expanded to a libcall.
2370 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2371 SPUTargetLowering &TLI) {
2372 EVT OpVT = Op.getValueType();
2373 SDValue Op0 = Op.getOperand(0);
2374 EVT Op0VT = Op0.getValueType();
2376 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2377 || OpVT == MVT::i64) {
2378 // Convert f32 / f64 to i32 / i64 via libcall.
2380 (Op.getOpcode() == ISD::FP_TO_SINT)
2381 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2382 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2383 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2385 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2391 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2393 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2394 All conversions from i64 are expanded to a libcall.
2396 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2397 SPUTargetLowering &TLI) {
2398 EVT OpVT = Op.getValueType();
2399 SDValue Op0 = Op.getOperand(0);
2400 EVT Op0VT = Op0.getValueType();
2402 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2403 || Op0VT == MVT::i64) {
2404 // Convert i32, i64 to f64 via libcall:
2406 (Op.getOpcode() == ISD::SINT_TO_FP)
2407 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2408 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2409 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2411 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2417 //! Lower ISD::SETCC
2419 This handles MVT::f64 (double floating point) condition lowering
2421 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2422 const TargetLowering &TLI) {
2423 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2424 DebugLoc dl = Op.getDebugLoc();
2425 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2427 SDValue lhs = Op.getOperand(0);
2428 SDValue rhs = Op.getOperand(1);
2429 EVT lhsVT = lhs.getValueType();
2430 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2432 EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2433 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2434 EVT IntVT(MVT::i64);
2436 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2437 // selected to a NOP:
2438 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2440 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2441 DAG.getNode(ISD::SRL, dl, IntVT,
2442 i64lhs, DAG.getConstant(32, MVT::i32)));
2443 SDValue lhsHi32abs =
2444 DAG.getNode(ISD::AND, dl, MVT::i32,
2445 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2447 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2449 // SETO and SETUO only use the lhs operand:
2450 if (CC->get() == ISD::SETO) {
2451 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2453 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2454 return DAG.getNode(ISD::XOR, dl, ccResultVT,
2455 DAG.getSetCC(dl, ccResultVT,
2456 lhs, DAG.getConstantFP(0.0, lhsVT),
2458 DAG.getConstant(ccResultAllOnes, ccResultVT));
2459 } else if (CC->get() == ISD::SETUO) {
2460 // Evaluates to true if Op0 is [SQ]NaN
2461 return DAG.getNode(ISD::AND, dl, ccResultVT,
2462 DAG.getSetCC(dl, ccResultVT,
2464 DAG.getConstant(0x7ff00000, MVT::i32),
2466 DAG.getSetCC(dl, ccResultVT,
2468 DAG.getConstant(0, MVT::i32),
2472 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2474 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2475 DAG.getNode(ISD::SRL, dl, IntVT,
2476 i64rhs, DAG.getConstant(32, MVT::i32)));
2478 // If a value is negative, subtract from the sign magnitude constant:
2479 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2481 // Convert the sign-magnitude representation into 2's complement:
2482 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2483 lhsHi32, DAG.getConstant(31, MVT::i32));
2484 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2486 DAG.getNode(ISD::SELECT, dl, IntVT,
2487 lhsSelectMask, lhsSignMag2TC, i64lhs);
2489 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2490 rhsHi32, DAG.getConstant(31, MVT::i32));
2491 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2493 DAG.getNode(ISD::SELECT, dl, IntVT,
2494 rhsSelectMask, rhsSignMag2TC, i64rhs);
2498 switch (CC->get()) {
2501 compareOp = ISD::SETEQ; break;
2504 compareOp = ISD::SETGT; break;
2507 compareOp = ISD::SETGE; break;
2510 compareOp = ISD::SETLT; break;
2513 compareOp = ISD::SETLE; break;
2516 compareOp = ISD::SETNE; break;
2518 llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
2522 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2523 (ISD::CondCode) compareOp);
2525 if ((CC->get() & 0x8) == 0) {
2526 // Ordered comparison:
2527 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2528 lhs, DAG.getConstantFP(0.0, MVT::f64),
2530 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2531 rhs, DAG.getConstantFP(0.0, MVT::f64),
2533 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2535 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2541 //! Lower ISD::SELECT_CC
2543 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2546 \note Need to revisit this in the future: if the code path through the true
2547 and false value computations is longer than the latency of a branch (6
2548 cycles), then it would be more advantageous to branch and insert a new basic
2549 block and branch on the condition. However, this code does not make that
2550 assumption, given the simplisitc uses so far.
2553 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2554 const TargetLowering &TLI) {
2555 EVT VT = Op.getValueType();
2556 SDValue lhs = Op.getOperand(0);
2557 SDValue rhs = Op.getOperand(1);
2558 SDValue trueval = Op.getOperand(2);
2559 SDValue falseval = Op.getOperand(3);
2560 SDValue condition = Op.getOperand(4);
2561 DebugLoc dl = Op.getDebugLoc();
2563 // NOTE: SELB's arguments: $rA, $rB, $mask
2565 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2566 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2567 // condition was true and 0s where the condition was false. Hence, the
2568 // arguments to SELB get reversed.
2570 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2571 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2572 // with another "cannot select select_cc" assert:
2574 SDValue compare = DAG.getNode(ISD::SETCC, dl,
2575 TLI.getSetCCResultType(Op.getValueType()),
2576 lhs, rhs, condition);
2577 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2580 //! Custom lower ISD::TRUNCATE
2581 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2583 // Type to truncate to
2584 EVT VT = Op.getValueType();
2585 MVT simpleVT = VT.getSimpleVT();
2586 EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2587 VT, (128 / VT.getSizeInBits()));
2588 DebugLoc dl = Op.getDebugLoc();
2590 // Type to truncate from
2591 SDValue Op0 = Op.getOperand(0);
2592 EVT Op0VT = Op0.getValueType();
2594 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2595 // Create shuffle mask, least significant doubleword of quadword
2596 unsigned maskHigh = 0x08090a0b;
2597 unsigned maskLow = 0x0c0d0e0f;
2598 // Use a shuffle to perform the truncation
2599 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2600 DAG.getConstant(maskHigh, MVT::i32),
2601 DAG.getConstant(maskLow, MVT::i32),
2602 DAG.getConstant(maskHigh, MVT::i32),
2603 DAG.getConstant(maskLow, MVT::i32));
2605 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2606 Op0, Op0, shufMask);
2608 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2611 return SDValue(); // Leave the truncate unmolested
2615 * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2616 * algorithm is to duplicate the sign bit using rotmai to generate at
2617 * least one byte full of sign bits. Then propagate the "sign-byte" into
2618 * the leftmost words and the i64/i32 into the rightmost words using shufb.
2620 * @param Op The sext operand
2621 * @param DAG The current DAG
2622 * @return The SDValue with the entire instruction sequence
2624 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2626 DebugLoc dl = Op.getDebugLoc();
2628 // Type to extend to
2629 MVT OpVT = Op.getValueType().getSimpleVT();
2631 // Type to extend from
2632 SDValue Op0 = Op.getOperand(0);
2633 MVT Op0VT = Op0.getValueType().getSimpleVT();
2635 // The type to extend to needs to be a i128 and
2636 // the type to extend from needs to be i64 or i32.
2637 assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2638 "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2640 // Create shuffle mask
2641 unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2642 unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11
2643 unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2644 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2645 DAG.getConstant(mask1, MVT::i32),
2646 DAG.getConstant(mask1, MVT::i32),
2647 DAG.getConstant(mask2, MVT::i32),
2648 DAG.getConstant(mask3, MVT::i32));
2650 // Word wise arithmetic right shift to generate at least one byte
2651 // that contains sign bits.
2652 MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2653 SDValue sraVal = DAG.getNode(ISD::SRA,
2656 DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2657 DAG.getConstant(31, MVT::i32));
2659 // Shuffle bytes - Copy the sign bits into the upper 64 bits
2660 // and the input value into the lower 64 bits.
2661 SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2662 DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
2664 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
2667 //! Custom (target-specific) lowering entry point
2669 This is where LLVM's DAG selection process calls to do target-specific
2673 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2675 unsigned Opc = (unsigned) Op.getOpcode();
2676 EVT VT = Op.getValueType();
2681 errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2682 errs() << "Op.getOpcode() = " << Opc << "\n";
2683 errs() << "*Op.getNode():\n";
2684 Op.getNode()->dump();
2686 llvm_unreachable(0);
2692 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2694 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2695 case ISD::ConstantPool:
2696 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2697 case ISD::GlobalAddress:
2698 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2699 case ISD::JumpTable:
2700 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2701 case ISD::ConstantFP:
2702 return LowerConstantFP(Op, DAG);
2704 // i8, i64 math ops:
2713 return LowerI8Math(Op, DAG, Opc, *this);
2717 case ISD::FP_TO_SINT:
2718 case ISD::FP_TO_UINT:
2719 return LowerFP_TO_INT(Op, DAG, *this);
2721 case ISD::SINT_TO_FP:
2722 case ISD::UINT_TO_FP:
2723 return LowerINT_TO_FP(Op, DAG, *this);
2725 // Vector-related lowering.
2726 case ISD::BUILD_VECTOR:
2727 return LowerBUILD_VECTOR(Op, DAG);
2728 case ISD::SCALAR_TO_VECTOR:
2729 return LowerSCALAR_TO_VECTOR(Op, DAG);
2730 case ISD::VECTOR_SHUFFLE:
2731 return LowerVECTOR_SHUFFLE(Op, DAG);
2732 case ISD::EXTRACT_VECTOR_ELT:
2733 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2734 case ISD::INSERT_VECTOR_ELT:
2735 return LowerINSERT_VECTOR_ELT(Op, DAG);
2737 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2741 return LowerByteImmed(Op, DAG);
2743 // Vector and i8 multiply:
2746 return LowerI8Math(Op, DAG, Opc, *this);
2749 return LowerCTPOP(Op, DAG);
2751 case ISD::SELECT_CC:
2752 return LowerSELECT_CC(Op, DAG, *this);
2755 return LowerSETCC(Op, DAG, *this);
2758 return LowerTRUNCATE(Op, DAG);
2760 case ISD::SIGN_EXTEND:
2761 return LowerSIGN_EXTEND(Op, DAG);
2767 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2768 SmallVectorImpl<SDValue>&Results,
2772 unsigned Opc = (unsigned) N->getOpcode();
2773 EVT OpVT = N->getValueType(0);
2777 errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2778 errs() << "Op.getOpcode() = " << Opc << "\n";
2779 errs() << "*Op.getNode():\n";
2787 /* Otherwise, return unchanged */
2790 //===----------------------------------------------------------------------===//
2791 // Target Optimization Hooks
2792 //===----------------------------------------------------------------------===//
2795 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2798 TargetMachine &TM = getTargetMachine();
2800 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2801 SelectionDAG &DAG = DCI.DAG;
2802 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2803 EVT NodeVT = N->getValueType(0); // The node's value type
2804 EVT Op0VT = Op0.getValueType(); // The first operand's result
2805 SDValue Result; // Initially, empty result
2806 DebugLoc dl = N->getDebugLoc();
2808 switch (N->getOpcode()) {
2811 SDValue Op1 = N->getOperand(1);
2813 if (Op0.getOpcode() == SPUISD::IndirectAddr
2814 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2815 // Normalize the operands to reduce repeated code
2816 SDValue IndirectArg = Op0, AddArg = Op1;
2818 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2823 if (isa<ConstantSDNode>(AddArg)) {
2824 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2825 SDValue IndOp1 = IndirectArg.getOperand(1);
2827 if (CN0->isNullValue()) {
2828 // (add (SPUindirect <arg>, <arg>), 0) ->
2829 // (SPUindirect <arg>, <arg>)
2831 #if !defined(NDEBUG)
2832 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2834 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2835 << "With: (SPUindirect <arg>, <arg>)\n";
2840 } else if (isa<ConstantSDNode>(IndOp1)) {
2841 // (add (SPUindirect <arg>, <const>), <const>) ->
2842 // (SPUindirect <arg>, <const + const>)
2843 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2844 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2845 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2847 #if !defined(NDEBUG)
2848 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2850 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2851 << "), " << CN0->getSExtValue() << ")\n"
2852 << "With: (SPUindirect <arg>, "
2853 << combinedConst << ")\n";
2857 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2858 IndirectArg, combinedValue);
2864 case ISD::SIGN_EXTEND:
2865 case ISD::ZERO_EXTEND:
2866 case ISD::ANY_EXTEND: {
2867 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2868 // (any_extend (SPUextract_elt0 <arg>)) ->
2869 // (SPUextract_elt0 <arg>)
2870 // Types must match, however...
2871 #if !defined(NDEBUG)
2872 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2873 errs() << "\nReplace: ";
2875 errs() << "\nWith: ";
2876 Op0.getNode()->dump(&DAG);
2885 case SPUISD::IndirectAddr: {
2886 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2887 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2888 if (CN != 0 && CN->getZExtValue() == 0) {
2889 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2890 // (SPUaform <addr>, 0)
2892 DEBUG(errs() << "Replace: ");
2893 DEBUG(N->dump(&DAG));
2894 DEBUG(errs() << "\nWith: ");
2895 DEBUG(Op0.getNode()->dump(&DAG));
2896 DEBUG(errs() << "\n");
2900 } else if (Op0.getOpcode() == ISD::ADD) {
2901 SDValue Op1 = N->getOperand(1);
2902 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2903 // (SPUindirect (add <arg>, <arg>), 0) ->
2904 // (SPUindirect <arg>, <arg>)
2905 if (CN1->isNullValue()) {
2907 #if !defined(NDEBUG)
2908 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2910 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2911 << "With: (SPUindirect <arg>, <arg>)\n";
2915 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2916 Op0.getOperand(0), Op0.getOperand(1));
2922 case SPUISD::SHLQUAD_L_BITS:
2923 case SPUISD::SHLQUAD_L_BYTES:
2924 case SPUISD::ROTBYTES_LEFT: {
2925 SDValue Op1 = N->getOperand(1);
2927 // Kill degenerate vector shifts:
2928 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2929 if (CN->isNullValue()) {
2935 case SPUISD::PREFSLOT2VEC: {
2936 switch (Op0.getOpcode()) {
2939 case ISD::ANY_EXTEND:
2940 case ISD::ZERO_EXTEND:
2941 case ISD::SIGN_EXTEND: {
2942 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2944 // but only if the SPUprefslot2vec and <arg> types match.
2945 SDValue Op00 = Op0.getOperand(0);
2946 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2947 SDValue Op000 = Op00.getOperand(0);
2948 if (Op000.getValueType() == NodeVT) {
2954 case SPUISD::VEC2PREFSLOT: {
2955 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2957 Result = Op0.getOperand(0);
2965 // Otherwise, return unchanged.
2967 if (Result.getNode()) {
2968 DEBUG(errs() << "\nReplace.SPU: ");
2969 DEBUG(N->dump(&DAG));
2970 DEBUG(errs() << "\nWith: ");
2971 DEBUG(Result.getNode()->dump(&DAG));
2972 DEBUG(errs() << "\n");
2979 //===----------------------------------------------------------------------===//
2980 // Inline Assembly Support
2981 //===----------------------------------------------------------------------===//
2983 /// getConstraintType - Given a constraint letter, return the type of
2984 /// constraint it is for this target.
2985 SPUTargetLowering::ConstraintType
2986 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2987 if (ConstraintLetter.size() == 1) {
2988 switch (ConstraintLetter[0]) {
2995 return C_RegisterClass;
2998 return TargetLowering::getConstraintType(ConstraintLetter);
3001 std::pair<unsigned, const TargetRegisterClass*>
3002 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3005 if (Constraint.size() == 1) {
3006 // GCC RS6000 Constraint Letters
3007 switch (Constraint[0]) {
3011 return std::make_pair(0U, SPU::R64CRegisterClass);
3012 return std::make_pair(0U, SPU::R32CRegisterClass);
3015 return std::make_pair(0U, SPU::R32FPRegisterClass);
3016 else if (VT == MVT::f64)
3017 return std::make_pair(0U, SPU::R64FPRegisterClass);
3020 return std::make_pair(0U, SPU::GPRCRegisterClass);
3024 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3027 //! Compute used/known bits for a SPU operand
3029 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3033 const SelectionDAG &DAG,
3034 unsigned Depth ) const {
3036 const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3038 switch (Op.getOpcode()) {
3040 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3046 case SPUISD::PREFSLOT2VEC:
3047 case SPUISD::LDRESULT:
3048 case SPUISD::VEC2PREFSLOT:
3049 case SPUISD::SHLQUAD_L_BITS:
3050 case SPUISD::SHLQUAD_L_BYTES:
3051 case SPUISD::VEC_ROTL:
3052 case SPUISD::VEC_ROTR:
3053 case SPUISD::ROTBYTES_LEFT:
3054 case SPUISD::SELECT_MASK:
3061 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3062 unsigned Depth) const {
3063 switch (Op.getOpcode()) {
3068 EVT VT = Op.getValueType();
3070 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3073 return VT.getSizeInBits();
3078 // LowerAsmOperandForConstraint
3080 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3081 char ConstraintLetter,
3083 std::vector<SDValue> &Ops,
3084 SelectionDAG &DAG) const {
3085 // Default, for the time being, to the base class handler
3086 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3090 /// isLegalAddressImmediate - Return true if the integer value can be used
3091 /// as the offset of the target addressing mode.
3092 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3093 const Type *Ty) const {
3094 // SPU's addresses are 256K:
3095 return (V > -(1 << 18) && V < (1 << 18) - 1);
3098 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3103 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3104 // The SPU target isn't yet aware of offsets.