2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "SPUMachineFunction.h"
19 #include "llvm/Constants.h"
20 #include "llvm/Function.h"
21 #include "llvm/Intrinsics.h"
22 #include "llvm/CallingConv.h"
23 #include "llvm/Type.h"
24 #include "llvm/CodeGen/CallingConvLower.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/SelectionDAG.h"
30 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
31 #include "llvm/Target/TargetOptions.h"
32 #include "llvm/ADT/VectorExtras.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/MathExtras.h"
36 #include "llvm/Support/raw_ostream.h"
41 // Used in getTargetNodeName() below
43 std::map<unsigned, const char *> node_names;
45 // Byte offset of the preferred slot (counted from the MSB)
46 int prefslotOffset(EVT VT) {
48 if (VT==MVT::i1) retval=3;
49 if (VT==MVT::i8) retval=3;
50 if (VT==MVT::i16) retval=2;
55 //! Expand a library call into an actual call DAG node
58 This code is taken from SelectionDAGLegalize, since it is not exposed as
59 part of the LLVM SelectionDAG API.
63 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
64 bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
65 // The input chain to this libcall is the entry node of the function.
66 // Legalizing the call will automatically add the previous call to the
68 SDValue InChain = DAG.getEntryNode();
70 TargetLowering::ArgListTy Args;
71 TargetLowering::ArgListEntry Entry;
72 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
73 EVT ArgVT = Op.getOperand(i).getValueType();
74 const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
75 Entry.Node = Op.getOperand(i);
77 Entry.isSExt = isSigned;
78 Entry.isZExt = !isSigned;
79 Args.push_back(Entry);
81 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
84 // Splice the libcall in wherever FindInputOutputChains tells us to.
86 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
87 std::pair<SDValue, SDValue> CallInfo =
88 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
89 0, TLI.getLibcallCallingConv(LC), false,
90 /*isReturnValueUsed=*/true,
91 Callee, Args, DAG, Op.getDebugLoc());
93 return CallInfo.first;
97 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
98 : TargetLowering(TM, new TargetLoweringObjectFileELF()),
101 // Use _setjmp/_longjmp instead of setjmp/longjmp.
102 setUseUnderscoreSetJmp(true);
103 setUseUnderscoreLongJmp(true);
105 // Set RTLIB libcall names as used by SPU:
106 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
108 // Set up the SPU's register classes:
109 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
110 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
111 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
112 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
113 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
114 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
115 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
117 // SPU has no sign or zero extended loads for i1, i8, i16:
118 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
119 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
120 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
122 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
123 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
125 setTruncStoreAction(MVT::i128, MVT::i64, Expand);
126 setTruncStoreAction(MVT::i128, MVT::i32, Expand);
127 setTruncStoreAction(MVT::i128, MVT::i16, Expand);
128 setTruncStoreAction(MVT::i128, MVT::i8, Expand);
130 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
132 // SPU constant load actions are custom lowered:
133 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
134 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
136 // SPU's loads and stores have to be custom lowered:
137 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
139 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
141 setOperationAction(ISD::LOAD, VT, Custom);
142 setOperationAction(ISD::STORE, VT, Custom);
143 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
144 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
145 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
147 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
148 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
149 setTruncStoreAction(VT, StoreVT, Expand);
153 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
155 MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
157 setOperationAction(ISD::LOAD, VT, Custom);
158 setOperationAction(ISD::STORE, VT, Custom);
160 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
161 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
162 setTruncStoreAction(VT, StoreVT, Expand);
166 // Expand the jumptable branches
167 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
168 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
170 // Custom lower SELECT_CC for most cases, but expand by default
171 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
172 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
173 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
174 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
175 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
177 // SPU has no intrinsics for these particular operations:
178 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
180 // SPU has no division/remainder instructions
181 setOperationAction(ISD::SREM, MVT::i8, Expand);
182 setOperationAction(ISD::UREM, MVT::i8, Expand);
183 setOperationAction(ISD::SDIV, MVT::i8, Expand);
184 setOperationAction(ISD::UDIV, MVT::i8, Expand);
185 setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
186 setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
187 setOperationAction(ISD::SREM, MVT::i16, Expand);
188 setOperationAction(ISD::UREM, MVT::i16, Expand);
189 setOperationAction(ISD::SDIV, MVT::i16, Expand);
190 setOperationAction(ISD::UDIV, MVT::i16, Expand);
191 setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
192 setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
193 setOperationAction(ISD::SREM, MVT::i32, Expand);
194 setOperationAction(ISD::UREM, MVT::i32, Expand);
195 setOperationAction(ISD::SDIV, MVT::i32, Expand);
196 setOperationAction(ISD::UDIV, MVT::i32, Expand);
197 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
198 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
199 setOperationAction(ISD::SREM, MVT::i64, Expand);
200 setOperationAction(ISD::UREM, MVT::i64, Expand);
201 setOperationAction(ISD::SDIV, MVT::i64, Expand);
202 setOperationAction(ISD::UDIV, MVT::i64, Expand);
203 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
204 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
205 setOperationAction(ISD::SREM, MVT::i128, Expand);
206 setOperationAction(ISD::UREM, MVT::i128, Expand);
207 setOperationAction(ISD::SDIV, MVT::i128, Expand);
208 setOperationAction(ISD::UDIV, MVT::i128, Expand);
209 setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
210 setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
212 // We don't support sin/cos/sqrt/fmod
213 setOperationAction(ISD::FSIN , MVT::f64, Expand);
214 setOperationAction(ISD::FCOS , MVT::f64, Expand);
215 setOperationAction(ISD::FREM , MVT::f64, Expand);
216 setOperationAction(ISD::FSIN , MVT::f32, Expand);
217 setOperationAction(ISD::FCOS , MVT::f32, Expand);
218 setOperationAction(ISD::FREM , MVT::f32, Expand);
220 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
222 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
223 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
225 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
226 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
228 // SPU can do rotate right and left, so legalize it... but customize for i8
229 // because instructions don't exist.
231 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
233 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
234 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
235 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
237 setOperationAction(ISD::ROTL, MVT::i32, Legal);
238 setOperationAction(ISD::ROTL, MVT::i16, Legal);
239 setOperationAction(ISD::ROTL, MVT::i8, Custom);
241 // SPU has no native version of shift left/right for i8
242 setOperationAction(ISD::SHL, MVT::i8, Custom);
243 setOperationAction(ISD::SRL, MVT::i8, Custom);
244 setOperationAction(ISD::SRA, MVT::i8, Custom);
246 // Make these operations legal and handle them during instruction selection:
247 setOperationAction(ISD::SHL, MVT::i64, Legal);
248 setOperationAction(ISD::SRL, MVT::i64, Legal);
249 setOperationAction(ISD::SRA, MVT::i64, Legal);
251 // Custom lower i8, i32 and i64 multiplications
252 setOperationAction(ISD::MUL, MVT::i8, Custom);
253 setOperationAction(ISD::MUL, MVT::i32, Legal);
254 setOperationAction(ISD::MUL, MVT::i64, Legal);
256 // Expand double-width multiplication
257 // FIXME: It would probably be reasonable to support some of these operations
258 setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
259 setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
260 setOperationAction(ISD::MULHU, MVT::i8, Expand);
261 setOperationAction(ISD::MULHS, MVT::i8, Expand);
262 setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
263 setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
264 setOperationAction(ISD::MULHU, MVT::i16, Expand);
265 setOperationAction(ISD::MULHS, MVT::i16, Expand);
266 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
267 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
268 setOperationAction(ISD::MULHU, MVT::i32, Expand);
269 setOperationAction(ISD::MULHS, MVT::i32, Expand);
270 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
271 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
272 setOperationAction(ISD::MULHU, MVT::i64, Expand);
273 setOperationAction(ISD::MULHS, MVT::i64, Expand);
275 // Need to custom handle (some) common i8, i64 math ops
276 setOperationAction(ISD::ADD, MVT::i8, Custom);
277 setOperationAction(ISD::ADD, MVT::i64, Legal);
278 setOperationAction(ISD::SUB, MVT::i8, Custom);
279 setOperationAction(ISD::SUB, MVT::i64, Legal);
281 // SPU does not have BSWAP. It does have i32 support CTLZ.
282 // CTPOP has to be custom lowered.
283 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
284 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
286 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
287 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
288 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
289 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
290 setOperationAction(ISD::CTPOP, MVT::i128, Expand);
292 setOperationAction(ISD::CTTZ , MVT::i8, Expand);
293 setOperationAction(ISD::CTTZ , MVT::i16, Expand);
294 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
295 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
296 setOperationAction(ISD::CTTZ , MVT::i128, Expand);
298 setOperationAction(ISD::CTLZ , MVT::i8, Promote);
299 setOperationAction(ISD::CTLZ , MVT::i16, Promote);
300 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
301 setOperationAction(ISD::CTLZ , MVT::i64, Expand);
302 setOperationAction(ISD::CTLZ , MVT::i128, Expand);
304 // SPU has a version of select that implements (a&~c)|(b&c), just like
305 // select ought to work:
306 setOperationAction(ISD::SELECT, MVT::i8, Legal);
307 setOperationAction(ISD::SELECT, MVT::i16, Legal);
308 setOperationAction(ISD::SELECT, MVT::i32, Legal);
309 setOperationAction(ISD::SELECT, MVT::i64, Legal);
311 setOperationAction(ISD::SETCC, MVT::i8, Legal);
312 setOperationAction(ISD::SETCC, MVT::i16, Legal);
313 setOperationAction(ISD::SETCC, MVT::i32, Legal);
314 setOperationAction(ISD::SETCC, MVT::i64, Legal);
315 setOperationAction(ISD::SETCC, MVT::f64, Custom);
317 // Custom lower i128 -> i64 truncates
318 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
320 // Custom lower i32/i64 -> i128 sign extend
321 setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
323 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
324 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
325 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
326 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
327 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
328 // to expand to a libcall, hence the custom lowering:
329 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
330 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
331 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
332 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
333 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
334 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
336 // FDIV on SPU requires custom lowering
337 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
339 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
340 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
341 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
342 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
343 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
344 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
345 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
346 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
347 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
349 setOperationAction(ISD::BITCAST, MVT::i32, Legal);
350 setOperationAction(ISD::BITCAST, MVT::f32, Legal);
351 setOperationAction(ISD::BITCAST, MVT::i64, Legal);
352 setOperationAction(ISD::BITCAST, MVT::f64, Legal);
354 // We cannot sextinreg(i1). Expand to shifts.
355 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
357 // We want to legalize GlobalAddress and ConstantPool nodes into the
358 // appropriate instructions to materialize the address.
359 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
361 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
363 setOperationAction(ISD::GlobalAddress, VT, Custom);
364 setOperationAction(ISD::ConstantPool, VT, Custom);
365 setOperationAction(ISD::JumpTable, VT, Custom);
368 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
369 setOperationAction(ISD::VASTART , MVT::Other, Custom);
371 // Use the default implementation.
372 setOperationAction(ISD::VAARG , MVT::Other, Expand);
373 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
374 setOperationAction(ISD::VAEND , MVT::Other, Expand);
375 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
376 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
377 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
378 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
380 // Cell SPU has instructions for converting between i64 and fp.
381 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
382 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
384 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
385 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
387 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
388 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
390 // First set operation action for all vector types to expand. Then we
391 // will selectively turn on ones that can be effectively codegen'd.
392 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
393 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
394 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
395 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
396 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
397 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
399 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
400 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
401 MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
403 // add/sub are legal for all supported vector VT's.
404 setOperationAction(ISD::ADD, VT, Legal);
405 setOperationAction(ISD::SUB, VT, Legal);
406 // mul has to be custom lowered.
407 setOperationAction(ISD::MUL, VT, Legal);
409 setOperationAction(ISD::AND, VT, Legal);
410 setOperationAction(ISD::OR, VT, Legal);
411 setOperationAction(ISD::XOR, VT, Legal);
412 setOperationAction(ISD::LOAD, VT, Custom);
413 setOperationAction(ISD::SELECT, VT, Legal);
414 setOperationAction(ISD::STORE, VT, Custom);
416 // These operations need to be expanded:
417 setOperationAction(ISD::SDIV, VT, Expand);
418 setOperationAction(ISD::SREM, VT, Expand);
419 setOperationAction(ISD::UDIV, VT, Expand);
420 setOperationAction(ISD::UREM, VT, Expand);
422 // Custom lower build_vector, constant pool spills, insert and
423 // extract vector elements:
424 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
425 setOperationAction(ISD::ConstantPool, VT, Custom);
426 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
427 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
428 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
429 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
432 setOperationAction(ISD::AND, MVT::v16i8, Custom);
433 setOperationAction(ISD::OR, MVT::v16i8, Custom);
434 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
435 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
437 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
439 setShiftAmountType(MVT::i32);
440 setBooleanContents(ZeroOrNegativeOneBooleanContent);
442 setStackPointerRegisterToSaveRestore(SPU::R1);
444 // We have target-specific dag combine patterns for the following nodes:
445 setTargetDAGCombine(ISD::ADD);
446 setTargetDAGCombine(ISD::ZERO_EXTEND);
447 setTargetDAGCombine(ISD::SIGN_EXTEND);
448 setTargetDAGCombine(ISD::ANY_EXTEND);
450 computeRegisterProperties();
452 // Set pre-RA register scheduler default to BURR, which produces slightly
453 // better code than the default (could also be TDRR, but TargetLowering.h
454 // needs a mod to support that model):
455 setSchedulingPreference(Sched::RegPressure);
459 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
461 if (node_names.empty()) {
462 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
463 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
464 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
465 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
466 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
467 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
468 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
469 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
470 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
471 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
472 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
473 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
474 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
475 node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS";
476 node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES";
477 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
478 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
479 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
480 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
481 "SPUISD::ROTBYTES_LEFT_BITS";
482 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
483 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
484 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
485 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
486 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
489 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
491 return ((i != node_names.end()) ? i->second : 0);
494 /// getFunctionAlignment - Return the Log2 alignment of this function.
495 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
499 //===----------------------------------------------------------------------===//
500 // Return the Cell SPU's SETCC result type
501 //===----------------------------------------------------------------------===//
503 MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
504 // i8, i16 and i32 are valid SETCC result types
505 MVT::SimpleValueType retval;
507 switch(VT.getSimpleVT().SimpleTy){
510 retval = MVT::i8; break;
512 retval = MVT::i16; break;
520 //===----------------------------------------------------------------------===//
521 // Calling convention code:
522 //===----------------------------------------------------------------------===//
524 #include "SPUGenCallingConv.inc"
526 //===----------------------------------------------------------------------===//
527 // LowerOperation implementation
528 //===----------------------------------------------------------------------===//
530 /// Custom lower loads for CellSPU
532 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
533 within a 16-byte block, we have to rotate to extract the requested element.
535 For extending loads, we also want to ensure that the following sequence is
536 emitted, e.g. for MVT::f32 extending load to MVT::f64:
540 %2 v16i8,ch = rotate %1
541 %3 v4f8, ch = bitconvert %2
542 %4 f32 = vec2perfslot %3
543 %5 f64 = fp_extend %4
547 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
548 LoadSDNode *LN = cast<LoadSDNode>(Op);
549 SDValue the_chain = LN->getChain();
550 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
551 EVT InVT = LN->getMemoryVT();
552 EVT OutVT = Op.getValueType();
553 ISD::LoadExtType ExtType = LN->getExtensionType();
554 unsigned alignment = LN->getAlignment();
555 int pso = prefslotOffset(InVT);
556 DebugLoc dl = Op.getDebugLoc();
557 EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
558 (128 / InVT.getSizeInBits()));
561 assert( LN->getAddressingMode() == ISD::UNINDEXED
562 && "we should get only UNINDEXED adresses");
563 // clean aligned loads can be selected as-is
564 if (InVT.getSizeInBits() == 128 && alignment == 16)
567 // Get pointerinfos to the memory chunk(s) that contain the data to load
568 uint64_t mpi_offset = LN->getPointerInfo().Offset;
569 mpi_offset -= mpi_offset%16;
570 MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset);
571 MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16);
574 SDValue basePtr = LN->getBasePtr();
577 if (alignment == 16) {
580 // Special cases for a known aligned load to simplify the base pointer
581 // and the rotation amount:
582 if (basePtr.getOpcode() == ISD::ADD
583 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
584 // Known offset into basePtr
585 int64_t offset = CN->getSExtValue();
586 int64_t rotamt = int64_t((offset & 0xf) - pso);
591 rotate = DAG.getConstant(rotamt, MVT::i16);
593 // Simplify the base pointer for this case:
594 basePtr = basePtr.getOperand(0);
595 if ((offset & ~0xf) > 0) {
596 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
598 DAG.getConstant((offset & ~0xf), PtrVT));
600 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
601 || (basePtr.getOpcode() == SPUISD::IndirectAddr
602 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
603 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
604 // Plain aligned a-form address: rotate into preferred slot
605 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
606 int64_t rotamt = -pso;
609 rotate = DAG.getConstant(rotamt, MVT::i16);
611 // Offset the rotate amount by the basePtr and the preferred slot
613 int64_t rotamt = -pso;
616 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
618 DAG.getConstant(rotamt, PtrVT));
621 // Unaligned load: must be more pessimistic about addressing modes:
622 if (basePtr.getOpcode() == ISD::ADD) {
623 MachineFunction &MF = DAG.getMachineFunction();
624 MachineRegisterInfo &RegInfo = MF.getRegInfo();
625 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
628 SDValue Op0 = basePtr.getOperand(0);
629 SDValue Op1 = basePtr.getOperand(1);
631 if (isa<ConstantSDNode>(Op1)) {
632 // Convert the (add <ptr>, <const>) to an indirect address contained
633 // in a register. Note that this is done because we need to avoid
634 // creating a 0(reg) d-form address due to the SPU's block loads.
635 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
636 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
637 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
639 // Convert the (add <arg1>, <arg2>) to an indirect address, which
640 // will likely be lowered as a reg(reg) x-form address.
641 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
644 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
646 DAG.getConstant(0, PtrVT));
649 // Offset the rotate amount by the basePtr and the preferred slot
651 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
653 DAG.getConstant(-pso, PtrVT));
656 // Do the load as a i128 to allow possible shifting
657 SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
659 LN->isVolatile(), LN->isNonTemporal(), 16);
661 // When the size is not greater than alignment we get all data with just
663 if (alignment >= InVT.getSizeInBits()/8) {
665 the_chain = low.getValue(1);
667 // Rotate into the preferred slot:
668 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
669 low.getValue(0), rotate);
671 // Convert the loaded v16i8 vector to the appropriate vector type
672 // specified by the operand:
673 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
674 InVT, (128 / InVT.getSizeInBits()));
675 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
676 DAG.getNode(ISD::BITCAST, dl, vecVT, result));
678 // When alignment is less than the size, we might need (known only at
679 // run-time) two loads
680 // TODO: if the memory address is composed only from constants, we have
681 // extra kowledge, and might avoid the second load
683 // storage position offset from lower 16 byte aligned memory chunk
684 SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
685 basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
687 SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
688 DAG.getConstant( 16, MVT::i32),
690 // get a registerfull of ones. (this implementation is a workaround: LLVM
691 // cannot handle 128 bit signed int constants)
692 SDValue ones = DAG.getConstant(-1, MVT::v4i32 );
693 ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
695 SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
696 DAG.getNode(ISD::ADD, dl, PtrVT,
698 DAG.getConstant(16, PtrVT)),
700 LN->isVolatile(), LN->isNonTemporal(), 16);
702 the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
705 // Shift the (possible) high part right to compensate the misalignemnt.
706 // if there is no highpart (i.e. value is i64 and offset is 4), this
707 // will zero out the high value.
708 high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high,
709 DAG.getNode(ISD::SUB, dl, MVT::i32,
710 DAG.getConstant( 16, MVT::i32),
714 // Shift the low similarily
715 // TODO: add SPUISD::SHL_BYTES
716 low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
718 // Merge the two parts
719 result = DAG.getNode(ISD::BITCAST, dl, vecVT,
720 DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
722 if (!InVT.isVector()) {
723 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result );
727 // Handle extending loads by extending the scalar result:
728 if (ExtType == ISD::SEXTLOAD) {
729 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
730 } else if (ExtType == ISD::ZEXTLOAD) {
731 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
732 } else if (ExtType == ISD::EXTLOAD) {
733 unsigned NewOpc = ISD::ANY_EXTEND;
735 if (OutVT.isFloatingPoint())
736 NewOpc = ISD::FP_EXTEND;
738 result = DAG.getNode(NewOpc, dl, OutVT, result);
741 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
742 SDValue retops[2] = {
747 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
748 retops, sizeof(retops) / sizeof(retops[0]));
752 /// Custom lower stores for CellSPU
754 All CellSPU stores are aligned to 16-byte boundaries, so for elements
755 within a 16-byte block, we have to generate a shuffle to insert the
756 requested element into its place, then store the resulting block.
759 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
760 StoreSDNode *SN = cast<StoreSDNode>(Op);
761 SDValue Value = SN->getValue();
762 EVT VT = Value.getValueType();
763 EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
764 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
765 DebugLoc dl = Op.getDebugLoc();
766 unsigned alignment = SN->getAlignment();
768 EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
769 (128 / StVT.getSizeInBits()));
770 // Get pointerinfos to the memory chunk(s) that contain the data to load
771 uint64_t mpi_offset = SN->getPointerInfo().Offset;
772 mpi_offset -= mpi_offset%16;
773 MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset);
774 MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16);
778 assert( SN->getAddressingMode() == ISD::UNINDEXED
779 && "we should get only UNINDEXED adresses");
780 // clean aligned loads can be selected as-is
781 if (StVT.getSizeInBits() == 128 && alignment == 16)
784 SDValue alignLoadVec;
785 SDValue basePtr = SN->getBasePtr();
786 SDValue the_chain = SN->getChain();
787 SDValue insertEltOffs;
789 if (alignment == 16) {
791 // Special cases for a known aligned load to simplify the base pointer
792 // and insertion byte:
793 if (basePtr.getOpcode() == ISD::ADD
794 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
795 // Known offset into basePtr
796 int64_t offset = CN->getSExtValue();
798 // Simplify the base pointer for this case:
799 basePtr = basePtr.getOperand(0);
800 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
802 DAG.getConstant((offset & 0xf), PtrVT));
804 if ((offset & ~0xf) > 0) {
805 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
807 DAG.getConstant((offset & ~0xf), PtrVT));
810 // Otherwise, assume it's at byte 0 of basePtr
811 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
813 DAG.getConstant(0, PtrVT));
814 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
816 DAG.getConstant(0, PtrVT));
819 // Unaligned load: must be more pessimistic about addressing modes:
820 if (basePtr.getOpcode() == ISD::ADD) {
821 MachineFunction &MF = DAG.getMachineFunction();
822 MachineRegisterInfo &RegInfo = MF.getRegInfo();
823 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
826 SDValue Op0 = basePtr.getOperand(0);
827 SDValue Op1 = basePtr.getOperand(1);
829 if (isa<ConstantSDNode>(Op1)) {
830 // Convert the (add <ptr>, <const>) to an indirect address contained
831 // in a register. Note that this is done because we need to avoid
832 // creating a 0(reg) d-form address due to the SPU's block loads.
833 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
834 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
835 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
837 // Convert the (add <arg1>, <arg2>) to an indirect address, which
838 // will likely be lowered as a reg(reg) x-form address.
839 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
842 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
844 DAG.getConstant(0, PtrVT));
847 // Insertion point is solely determined by basePtr's contents
848 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
850 DAG.getConstant(0, PtrVT));
853 // Load the lower part of the memory to which to store.
854 SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
855 lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), 16);
857 // if we don't need to store over the 16 byte boundary, one store suffices
858 if (alignment >= StVT.getSizeInBits()/8) {
860 the_chain = low.getValue(1);
862 LoadSDNode *LN = cast<LoadSDNode>(low);
863 SDValue theValue = SN->getValue();
866 && (theValue.getOpcode() == ISD::AssertZext
867 || theValue.getOpcode() == ISD::AssertSext)) {
868 // Drill down and get the value for zero- and sign-extended
870 theValue = theValue.getOperand(0);
873 // If the base pointer is already a D-form address, then just create
874 // a new D-form address with a slot offset and the orignal base pointer.
875 // Otherwise generate a D-form address with the slot offset relative
876 // to the stack pointer, which is always aligned.
878 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
879 errs() << "CellSPU LowerSTORE: basePtr = ";
880 basePtr.getNode()->dump(&DAG);
885 SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
887 SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
890 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
892 DAG.getNode(ISD::BITCAST, dl,
893 MVT::v4i32, insertEltOp));
895 result = DAG.getStore(the_chain, dl, result, basePtr,
897 LN->isVolatile(), LN->isNonTemporal(),
901 // do the store when it might cross the 16 byte memory access boundary.
903 // TODO issue a warning if SN->isVolatile()== true? This is likely not
904 // what the user wanted.
906 // address offset from nearest lower 16byte alinged address
907 SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
909 DAG.getConstant(0xf, MVT::i32));
911 SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
912 DAG.getConstant( 16, MVT::i32),
914 SDValue hi_shift = DAG.getNode(ISD::SUB, dl, MVT::i32,
915 DAG.getConstant( VT.getSizeInBits()/8,
918 // 16 - sizeof(Value)
919 SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32,
920 DAG.getConstant( 16, MVT::i32),
921 DAG.getConstant( VT.getSizeInBits()/8,
923 // get a registerfull of ones
924 SDValue ones = DAG.getConstant(-1, MVT::v4i32);
925 ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
927 // Create the 128 bit masks that have ones where the data to store is
929 SDValue lowmask, himask;
930 // if the value to store don't fill up the an entire 128 bits, zero
931 // out the last bits of the mask so that only the value we want to store
933 // this is e.g. in the case of store i32, align 2
935 Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value);
936 lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus);
937 lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
939 Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
940 Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask);
945 Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
947 // this will zero, if there are no data that goes to the high quad
948 himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
950 lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask,
953 // Load in the old data and zero out the parts that will be overwritten with
954 // the new data to store.
955 SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain,
956 DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
957 DAG.getConstant( 16, PtrVT)),
959 SN->isVolatile(), SN->isNonTemporal(), 16);
960 the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
963 low = DAG.getNode(ISD::AND, dl, MVT::i128,
964 DAG.getNode( ISD::BITCAST, dl, MVT::i128, low),
965 DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones));
966 hi = DAG.getNode(ISD::AND, dl, MVT::i128,
967 DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi),
968 DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones));
970 // Shift the Value to store into place. rlow contains the parts that go to
971 // the lower memory chunk, rhi has the parts that go to the upper one.
972 SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset);
973 rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask);
974 SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value,
977 // Merge the old data and the new data and store the results
978 // Need to convert vectors here to integer as 'OR'ing floats assert
979 rlow = DAG.getNode(ISD::OR, dl, MVT::i128,
980 DAG.getNode(ISD::BITCAST, dl, MVT::i128, low),
981 DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow));
982 rhi = DAG.getNode(ISD::OR, dl, MVT::i128,
983 DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi),
984 DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi));
986 low = DAG.getStore(the_chain, dl, rlow, basePtr,
988 SN->isVolatile(), SN->isNonTemporal(), 16);
989 hi = DAG.getStore(the_chain, dl, rhi,
990 DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
991 DAG.getConstant( 16, PtrVT)),
993 SN->isVolatile(), SN->isNonTemporal(), 16);
994 result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0),
1001 //! Generate the address of a constant pool entry.
1003 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1004 EVT PtrVT = Op.getValueType();
1005 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1006 const Constant *C = CP->getConstVal();
1007 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
1008 SDValue Zero = DAG.getConstant(0, PtrVT);
1009 const TargetMachine &TM = DAG.getTarget();
1010 // FIXME there is no actual debug info here
1011 DebugLoc dl = Op.getDebugLoc();
1013 if (TM.getRelocationModel() == Reloc::Static) {
1014 if (!ST->usingLargeMem()) {
1015 // Just return the SDValue with the constant pool address in it.
1016 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
1018 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
1019 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
1020 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
1024 llvm_unreachable("LowerConstantPool: Relocation model other than static"
1029 //! Alternate entry point for generating the address of a constant pool entry
1031 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
1032 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
1036 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1037 EVT PtrVT = Op.getValueType();
1038 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1039 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
1040 SDValue Zero = DAG.getConstant(0, PtrVT);
1041 const TargetMachine &TM = DAG.getTarget();
1042 // FIXME there is no actual debug info here
1043 DebugLoc dl = Op.getDebugLoc();
1045 if (TM.getRelocationModel() == Reloc::Static) {
1046 if (!ST->usingLargeMem()) {
1047 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
1049 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
1050 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
1051 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
1055 llvm_unreachable("LowerJumpTable: Relocation model other than static"
1061 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1062 EVT PtrVT = Op.getValueType();
1063 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
1064 const GlobalValue *GV = GSDN->getGlobal();
1065 SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
1066 PtrVT, GSDN->getOffset());
1067 const TargetMachine &TM = DAG.getTarget();
1068 SDValue Zero = DAG.getConstant(0, PtrVT);
1069 // FIXME there is no actual debug info here
1070 DebugLoc dl = Op.getDebugLoc();
1072 if (TM.getRelocationModel() == Reloc::Static) {
1073 if (!ST->usingLargeMem()) {
1074 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
1076 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
1077 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
1078 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
1081 report_fatal_error("LowerGlobalAddress: Relocation model other than static"
1089 //! Custom lower double precision floating point constants
1091 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
1092 EVT VT = Op.getValueType();
1093 // FIXME there is no actual debug info here
1094 DebugLoc dl = Op.getDebugLoc();
1096 if (VT == MVT::f64) {
1097 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
1100 "LowerConstantFP: Node is not ConstantFPSDNode");
1102 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1103 SDValue T = DAG.getConstant(dbits, MVT::i64);
1104 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1105 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1106 DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec));
1113 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1114 CallingConv::ID CallConv, bool isVarArg,
1115 const SmallVectorImpl<ISD::InputArg>
1117 DebugLoc dl, SelectionDAG &DAG,
1118 SmallVectorImpl<SDValue> &InVals)
1121 MachineFunction &MF = DAG.getMachineFunction();
1122 MachineFrameInfo *MFI = MF.getFrameInfo();
1123 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1124 SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
1126 unsigned ArgOffset = SPUFrameInfo::minStackSize();
1127 unsigned ArgRegIdx = 0;
1128 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1130 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1132 SmallVector<CCValAssign, 16> ArgLocs;
1133 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1135 // FIXME: allow for other calling conventions
1136 CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
1138 // Add DAG nodes to load the arguments or copy them out of registers.
1139 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1140 EVT ObjectVT = Ins[ArgNo].VT;
1141 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1143 CCValAssign &VA = ArgLocs[ArgNo];
1145 if (VA.isRegLoc()) {
1146 const TargetRegisterClass *ArgRegClass;
1148 switch (ObjectVT.getSimpleVT().SimpleTy) {
1150 report_fatal_error("LowerFormalArguments Unhandled argument type: " +
1151 Twine(ObjectVT.getEVTString()));
1153 ArgRegClass = &SPU::R8CRegClass;
1156 ArgRegClass = &SPU::R16CRegClass;
1159 ArgRegClass = &SPU::R32CRegClass;
1162 ArgRegClass = &SPU::R64CRegClass;
1165 ArgRegClass = &SPU::GPRCRegClass;
1168 ArgRegClass = &SPU::R32FPRegClass;
1171 ArgRegClass = &SPU::R64FPRegClass;
1179 ArgRegClass = &SPU::VECREGRegClass;
1183 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1184 RegInfo.addLiveIn(VA.getLocReg(), VReg);
1185 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1188 // We need to load the argument to a virtual register if we determined
1189 // above that we ran out of physical registers of the appropriate type
1190 // or we're forced to do vararg
1191 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
1192 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1193 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
1195 ArgOffset += StackSlotSize;
1198 InVals.push_back(ArgVal);
1200 Chain = ArgVal.getOperand(0);
1205 // FIXME: we should be able to query the argument registers from
1206 // tablegen generated code.
1207 static const unsigned ArgRegs[] = {
1208 SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
1209 SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
1210 SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
1211 SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
1212 SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
1213 SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
1214 SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
1215 SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
1216 SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
1217 SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
1218 SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
1220 // size of ArgRegs array
1221 unsigned NumArgRegs = 77;
1223 // We will spill (79-3)+1 registers to the stack
1224 SmallVector<SDValue, 79-3+1> MemOps;
1226 // Create the frame slot
1227 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1228 FuncInfo->setVarArgsFrameIndex(
1229 MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
1230 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1231 unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
1232 SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
1233 SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
1235 Chain = Store.getOperand(0);
1236 MemOps.push_back(Store);
1238 // Increment address by stack slot size for the next stored argument
1239 ArgOffset += StackSlotSize;
1241 if (!MemOps.empty())
1242 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1243 &MemOps[0], MemOps.size());
1249 /// isLSAAddress - Return the immediate to use if the specified
1250 /// value is representable as a LSA address.
1251 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1252 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1255 int Addr = C->getZExtValue();
1256 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1257 (Addr << 14 >> 14) != Addr)
1258 return 0; // Top 14 bits have to be sext of immediate.
1260 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1264 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1265 CallingConv::ID CallConv, bool isVarArg,
1267 const SmallVectorImpl<ISD::OutputArg> &Outs,
1268 const SmallVectorImpl<SDValue> &OutVals,
1269 const SmallVectorImpl<ISD::InputArg> &Ins,
1270 DebugLoc dl, SelectionDAG &DAG,
1271 SmallVectorImpl<SDValue> &InVals) const {
1272 // CellSPU target does not yet support tail call optimization.
1275 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1276 unsigned NumOps = Outs.size();
1277 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1279 SmallVector<CCValAssign, 16> ArgLocs;
1280 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1282 // FIXME: allow for other calling conventions
1283 CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
1285 const unsigned NumArgRegs = ArgLocs.size();
1288 // Handy pointer type
1289 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1291 // Set up a copy of the stack pointer for use loading and storing any
1292 // arguments that may not fit in the registers available for argument
1294 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1296 // Figure out which arguments are going to go in registers, and which in
1298 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1299 unsigned ArgRegIdx = 0;
1301 // Keep track of registers passing arguments
1302 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1303 // And the arguments passed on the stack
1304 SmallVector<SDValue, 8> MemOpChains;
1306 for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
1307 SDValue Arg = OutVals[ArgRegIdx];
1308 CCValAssign &VA = ArgLocs[ArgRegIdx];
1310 // PtrOff will be used to store the current argument to the stack if a
1311 // register cannot be found for it.
1312 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1313 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1315 switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1316 default: llvm_unreachable("Unexpected ValueType for argument!");
1330 if (ArgRegIdx != NumArgRegs) {
1331 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1333 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
1334 MachinePointerInfo(),
1336 ArgOffset += StackSlotSize;
1342 // Accumulate how many bytes are to be pushed on the stack, including the
1343 // linkage area, and parameter passing area. According to the SPU ABI,
1344 // we minimally need space for [LR] and [SP].
1345 unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize();
1347 // Insert a call sequence start
1348 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1351 if (!MemOpChains.empty()) {
1352 // Adjust the stack pointer for the stack arguments.
1353 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1354 &MemOpChains[0], MemOpChains.size());
1357 // Build a sequence of copy-to-reg nodes chained together with token chain
1358 // and flag operands which copy the outgoing args into the appropriate regs.
1360 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1361 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1362 RegsToPass[i].second, InFlag);
1363 InFlag = Chain.getValue(1);
1366 SmallVector<SDValue, 8> Ops;
1367 unsigned CallOpc = SPUISD::CALL;
1369 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1370 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1371 // node so that legalize doesn't hack it.
1372 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1373 const GlobalValue *GV = G->getGlobal();
1374 EVT CalleeVT = Callee.getValueType();
1375 SDValue Zero = DAG.getConstant(0, PtrVT);
1376 SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
1378 if (!ST->usingLargeMem()) {
1379 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1380 // style calls, otherwise, external symbols are BRASL calls. This assumes
1381 // that declared/defined symbols are in the same compilation unit and can
1382 // be reached through PC-relative jumps.
1385 // This may be an unsafe assumption for JIT and really large compilation
1387 if (GV->isDeclaration()) {
1388 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1390 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1393 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1395 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1397 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1398 EVT CalleeVT = Callee.getValueType();
1399 SDValue Zero = DAG.getConstant(0, PtrVT);
1400 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1401 Callee.getValueType());
1403 if (!ST->usingLargeMem()) {
1404 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1406 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1408 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1409 // If this is an absolute destination address that appears to be a legal
1410 // local store address, use the munged value.
1411 Callee = SDValue(Dest, 0);
1414 Ops.push_back(Chain);
1415 Ops.push_back(Callee);
1417 // Add argument registers to the end of the list so that they are known live
1419 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1420 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1421 RegsToPass[i].second.getValueType()));
1423 if (InFlag.getNode())
1424 Ops.push_back(InFlag);
1425 // Returns a chain and a flag for retval copy to use.
1426 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue),
1427 &Ops[0], Ops.size());
1428 InFlag = Chain.getValue(1);
1430 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1431 DAG.getIntPtrConstant(0, true), InFlag);
1433 InFlag = Chain.getValue(1);
1435 // If the function returns void, just return the chain.
1439 // Now handle the return value(s)
1440 SmallVector<CCValAssign, 16> RVLocs;
1441 CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(),
1442 RVLocs, *DAG.getContext());
1443 CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
1446 // If the call has results, copy the values out of the ret val registers.
1447 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1448 CCValAssign VA = RVLocs[i];
1450 SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1452 Chain = Val.getValue(1);
1453 InFlag = Val.getValue(2);
1454 InVals.push_back(Val);
1461 SPUTargetLowering::LowerReturn(SDValue Chain,
1462 CallingConv::ID CallConv, bool isVarArg,
1463 const SmallVectorImpl<ISD::OutputArg> &Outs,
1464 const SmallVectorImpl<SDValue> &OutVals,
1465 DebugLoc dl, SelectionDAG &DAG) const {
1467 SmallVector<CCValAssign, 16> RVLocs;
1468 CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1469 RVLocs, *DAG.getContext());
1470 CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1472 // If this is the first return lowered for this function, add the regs to the
1473 // liveout set for the function.
1474 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1475 for (unsigned i = 0; i != RVLocs.size(); ++i)
1476 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1481 // Copy the result values into the output registers.
1482 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1483 CCValAssign &VA = RVLocs[i];
1484 assert(VA.isRegLoc() && "Can only return in registers!");
1485 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1487 Flag = Chain.getValue(1);
1491 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1493 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1497 //===----------------------------------------------------------------------===//
1498 // Vector related lowering:
1499 //===----------------------------------------------------------------------===//
1501 static ConstantSDNode *
1502 getVecImm(SDNode *N) {
1503 SDValue OpVal(0, 0);
1505 // Check to see if this buildvec has a single non-undef value in its elements.
1506 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1507 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1508 if (OpVal.getNode() == 0)
1509 OpVal = N->getOperand(i);
1510 else if (OpVal != N->getOperand(i))
1514 if (OpVal.getNode() != 0) {
1515 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1523 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1524 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1526 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1528 if (ConstantSDNode *CN = getVecImm(N)) {
1529 uint64_t Value = CN->getZExtValue();
1530 if (ValueType == MVT::i64) {
1531 uint64_t UValue = CN->getZExtValue();
1532 uint32_t upper = uint32_t(UValue >> 32);
1533 uint32_t lower = uint32_t(UValue);
1536 Value = Value >> 32;
1538 if (Value <= 0x3ffff)
1539 return DAG.getTargetConstant(Value, ValueType);
1545 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1546 /// and the value fits into a signed 16-bit constant, and if so, return the
1548 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1550 if (ConstantSDNode *CN = getVecImm(N)) {
1551 int64_t Value = CN->getSExtValue();
1552 if (ValueType == MVT::i64) {
1553 uint64_t UValue = CN->getZExtValue();
1554 uint32_t upper = uint32_t(UValue >> 32);
1555 uint32_t lower = uint32_t(UValue);
1558 Value = Value >> 32;
1560 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1561 return DAG.getTargetConstant(Value, ValueType);
1568 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1569 /// and the value fits into a signed 10-bit constant, and if so, return the
1571 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1573 if (ConstantSDNode *CN = getVecImm(N)) {
1574 int64_t Value = CN->getSExtValue();
1575 if (ValueType == MVT::i64) {
1576 uint64_t UValue = CN->getZExtValue();
1577 uint32_t upper = uint32_t(UValue >> 32);
1578 uint32_t lower = uint32_t(UValue);
1581 Value = Value >> 32;
1583 if (isInt<10>(Value))
1584 return DAG.getTargetConstant(Value, ValueType);
1590 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1591 /// and the value fits into a signed 8-bit constant, and if so, return the
1594 /// @note: The incoming vector is v16i8 because that's the only way we can load
1595 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1597 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1599 if (ConstantSDNode *CN = getVecImm(N)) {
1600 int Value = (int) CN->getZExtValue();
1601 if (ValueType == MVT::i16
1602 && Value <= 0xffff /* truncated from uint64_t */
1603 && ((short) Value >> 8) == ((short) Value & 0xff))
1604 return DAG.getTargetConstant(Value & 0xff, ValueType);
1605 else if (ValueType == MVT::i8
1606 && (Value & 0xff) == Value)
1607 return DAG.getTargetConstant(Value, ValueType);
1613 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1614 /// and the value fits into a signed 16-bit constant, and if so, return the
1616 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1618 if (ConstantSDNode *CN = getVecImm(N)) {
1619 uint64_t Value = CN->getZExtValue();
1620 if ((ValueType == MVT::i32
1621 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1622 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1623 return DAG.getTargetConstant(Value >> 16, ValueType);
1629 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1630 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1631 if (ConstantSDNode *CN = getVecImm(N)) {
1632 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1638 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1639 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1640 if (ConstantSDNode *CN = getVecImm(N)) {
1641 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1647 //! Lower a BUILD_VECTOR instruction creatively:
1649 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1650 EVT VT = Op.getValueType();
1651 EVT EltVT = VT.getVectorElementType();
1652 DebugLoc dl = Op.getDebugLoc();
1653 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1654 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1655 unsigned minSplatBits = EltVT.getSizeInBits();
1657 if (minSplatBits < 16)
1660 APInt APSplatBits, APSplatUndef;
1661 unsigned SplatBitSize;
1664 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1665 HasAnyUndefs, minSplatBits)
1666 || minSplatBits < SplatBitSize)
1667 return SDValue(); // Wasn't a constant vector or splat exceeded min
1669 uint64_t SplatBits = APSplatBits.getZExtValue();
1671 switch (VT.getSimpleVT().SimpleTy) {
1673 report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
1674 Twine(VT.getEVTString()));
1677 uint32_t Value32 = uint32_t(SplatBits);
1678 assert(SplatBitSize == 32
1679 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1680 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1681 SDValue T = DAG.getConstant(Value32, MVT::i32);
1682 return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,
1683 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1687 uint64_t f64val = uint64_t(SplatBits);
1688 assert(SplatBitSize == 64
1689 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1690 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1691 SDValue T = DAG.getConstant(f64val, MVT::i64);
1692 return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64,
1693 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1697 // 8-bit constants have to be expanded to 16-bits
1698 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1699 SmallVector<SDValue, 8> Ops;
1701 Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1702 return DAG.getNode(ISD::BITCAST, dl, VT,
1703 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1706 unsigned short Value16 = SplatBits;
1707 SDValue T = DAG.getConstant(Value16, EltVT);
1708 SmallVector<SDValue, 8> Ops;
1711 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1714 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1715 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1718 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1728 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1730 uint32_t upper = uint32_t(SplatVal >> 32);
1731 uint32_t lower = uint32_t(SplatVal);
1733 if (upper == lower) {
1734 // Magic constant that can be matched by IL, ILA, et. al.
1735 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1736 return DAG.getNode(ISD::BITCAST, dl, OpVT,
1737 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1738 Val, Val, Val, Val));
1740 bool upper_special, lower_special;
1742 // NOTE: This code creates common-case shuffle masks that can be easily
1743 // detected as common expressions. It is not attempting to create highly
1744 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1746 // Detect if the upper or lower half is a special shuffle mask pattern:
1747 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1748 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1750 // Both upper and lower are special, lower to a constant pool load:
1751 if (lower_special && upper_special) {
1752 SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1753 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1754 SplatValCN, SplatValCN);
1759 SmallVector<SDValue, 16> ShufBytes;
1762 // Create lower vector if not a special pattern
1763 if (!lower_special) {
1764 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1765 LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
1766 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1767 LO32C, LO32C, LO32C, LO32C));
1770 // Create upper vector if not a special pattern
1771 if (!upper_special) {
1772 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1773 HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
1774 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1775 HI32C, HI32C, HI32C, HI32C));
1778 // If either upper or lower are special, then the two input operands are
1779 // the same (basically, one of them is a "don't care")
1785 for (int i = 0; i < 4; ++i) {
1787 for (int j = 0; j < 4; ++j) {
1789 bool process_upper, process_lower;
1791 process_upper = (upper_special && (i & 1) == 0);
1792 process_lower = (lower_special && (i & 1) == 1);
1794 if (process_upper || process_lower) {
1795 if ((process_upper && upper == 0)
1796 || (process_lower && lower == 0))
1798 else if ((process_upper && upper == 0xffffffff)
1799 || (process_lower && lower == 0xffffffff))
1801 else if ((process_upper && upper == 0x80000000)
1802 || (process_lower && lower == 0x80000000))
1803 val |= (j == 0 ? 0xe0 : 0x80);
1805 val |= i * 4 + j + ((i & 1) * 16);
1808 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1811 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1812 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1813 &ShufBytes[0], ShufBytes.size()));
1817 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1818 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1819 /// permutation vector, V3, is monotonically increasing with one "exception"
1820 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1821 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1822 /// In either case, the net result is going to eventually invoke SHUFB to
1823 /// permute/shuffle the bytes from V1 and V2.
1825 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1826 /// control word for byte/halfword/word insertion. This takes care of a single
1827 /// element move from V2 into V1.
1829 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1830 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1831 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1832 SDValue V1 = Op.getOperand(0);
1833 SDValue V2 = Op.getOperand(1);
1834 DebugLoc dl = Op.getDebugLoc();
1836 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1838 // If we have a single element being moved from V1 to V2, this can be handled
1839 // using the C*[DX] compute mask instructions, but the vector elements have
1840 // to be monotonically increasing with one exception element, and the source
1841 // slot of the element to move must be the same as the destination.
1842 EVT VecVT = V1.getValueType();
1843 EVT EltVT = VecVT.getVectorElementType();
1844 unsigned EltsFromV2 = 0;
1845 unsigned V2EltOffset = 0;
1846 unsigned V2EltIdx0 = 0;
1847 unsigned CurrElt = 0;
1848 unsigned MaxElts = VecVT.getVectorNumElements();
1849 unsigned PrevElt = 0;
1850 bool monotonic = true;
1853 EVT maskVT; // which of the c?d instructions to use
1855 if (EltVT == MVT::i8) {
1857 maskVT = MVT::v16i8;
1858 } else if (EltVT == MVT::i16) {
1860 maskVT = MVT::v8i16;
1861 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1863 maskVT = MVT::v4i32;
1864 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1866 maskVT = MVT::v2i64;
1868 llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1870 for (unsigned i = 0; i != MaxElts; ++i) {
1871 if (SVN->getMaskElt(i) < 0)
1874 unsigned SrcElt = SVN->getMaskElt(i);
1877 if (SrcElt >= V2EltIdx0) {
1878 // TODO: optimize for the monotonic case when several consecutive
1879 // elements are taken form V2. Do we ever get such a case?
1880 if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
1881 V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
1885 } else if (CurrElt != SrcElt) {
1893 if (PrevElt > 0 && SrcElt < MaxElts) {
1894 if ((PrevElt == SrcElt - 1)
1895 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1900 } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
1901 // First time or after a "wrap around"
1905 // This isn't a rotation, takes elements from vector 2
1911 if (EltsFromV2 == 1 && monotonic) {
1912 // Compute mask and shuffle
1913 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1915 // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
1916 // R1 ($sp) is used here only as it is guaranteed to have last bits zero
1917 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
1918 DAG.getRegister(SPU::R1, PtrVT),
1919 DAG.getConstant(V2EltOffset, MVT::i32));
1920 SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
1923 // Use shuffle mask in SHUFB synthetic instruction:
1924 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1926 } else if (rotate) {
1929 rotamt *= EltVT.getSizeInBits()/8;
1930 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1931 V1, DAG.getConstant(rotamt, MVT::i16));
1933 // Convert the SHUFFLE_VECTOR mask's input element units to the
1935 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1937 SmallVector<SDValue, 16> ResultMask;
1938 for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1939 unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1941 for (unsigned j = 0; j < BytesPerElement; ++j)
1942 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1944 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1945 &ResultMask[0], ResultMask.size());
1946 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1950 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1951 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1952 DebugLoc dl = Op.getDebugLoc();
1954 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1955 // For a constant, build the appropriate constant vector, which will
1956 // eventually simplify to a vector register load.
1958 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1959 SmallVector<SDValue, 16> ConstVecValues;
1963 // Create a constant vector:
1964 switch (Op.getValueType().getSimpleVT().SimpleTy) {
1965 default: llvm_unreachable("Unexpected constant value type in "
1966 "LowerSCALAR_TO_VECTOR");
1967 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1968 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1969 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1970 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1971 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1972 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1975 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1976 for (size_t j = 0; j < n_copies; ++j)
1977 ConstVecValues.push_back(CValue);
1979 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1980 &ConstVecValues[0], ConstVecValues.size());
1982 // Otherwise, copy the value from one register to another:
1983 switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1984 default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1991 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1998 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1999 EVT VT = Op.getValueType();
2000 SDValue N = Op.getOperand(0);
2001 SDValue Elt = Op.getOperand(1);
2002 DebugLoc dl = Op.getDebugLoc();
2005 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2006 // Constant argument:
2007 int EltNo = (int) C->getZExtValue();
2010 if (VT == MVT::i8 && EltNo >= 16)
2011 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2012 else if (VT == MVT::i16 && EltNo >= 8)
2013 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2014 else if (VT == MVT::i32 && EltNo >= 4)
2015 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2016 else if (VT == MVT::i64 && EltNo >= 2)
2017 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2019 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2020 // i32 and i64: Element 0 is the preferred slot
2021 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
2024 // Need to generate shuffle mask and extract:
2025 int prefslot_begin = -1, prefslot_end = -1;
2026 int elt_byte = EltNo * VT.getSizeInBits() / 8;
2028 switch (VT.getSimpleVT().SimpleTy) {
2030 assert(false && "Invalid value type!");
2032 prefslot_begin = prefslot_end = 3;
2036 prefslot_begin = 2; prefslot_end = 3;
2041 prefslot_begin = 0; prefslot_end = 3;
2046 prefslot_begin = 0; prefslot_end = 7;
2051 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2052 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2054 unsigned int ShufBytes[16] = {
2055 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2057 for (int i = 0; i < 16; ++i) {
2058 // zero fill uppper part of preferred slot, don't care about the
2060 unsigned int mask_val;
2061 if (i <= prefslot_end) {
2063 ((i < prefslot_begin)
2065 : elt_byte + (i - prefslot_begin));
2067 ShufBytes[i] = mask_val;
2069 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
2072 SDValue ShufMask[4];
2073 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2074 unsigned bidx = i * 4;
2075 unsigned int bits = ((ShufBytes[bidx] << 24) |
2076 (ShufBytes[bidx+1] << 16) |
2077 (ShufBytes[bidx+2] << 8) |
2079 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2082 SDValue ShufMaskVec =
2083 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2084 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
2086 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2087 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
2088 N, N, ShufMaskVec));
2090 // Variable index: Rotate the requested element into slot 0, then replicate
2091 // slot 0 across the vector
2092 EVT VecVT = N.getValueType();
2093 if (!VecVT.isSimple() || !VecVT.isVector()) {
2094 report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2098 // Make life easier by making sure the index is zero-extended to i32
2099 if (Elt.getValueType() != MVT::i32)
2100 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2102 // Scale the index to a bit/byte shift quantity
2104 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2105 unsigned scaleShift = scaleFactor.logBase2();
2108 if (scaleShift > 0) {
2109 // Scale the shift factor:
2110 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2111 DAG.getConstant(scaleShift, MVT::i32));
2114 vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt);
2116 // Replicate the bytes starting at byte 0 across the entire vector (for
2117 // consistency with the notion of a unified register set)
2120 switch (VT.getSimpleVT().SimpleTy) {
2122 report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2126 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2127 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2128 factor, factor, factor, factor);
2132 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2133 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2134 factor, factor, factor, factor);
2139 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2140 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2141 factor, factor, factor, factor);
2146 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2147 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2148 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2149 loFactor, hiFactor, loFactor, hiFactor);
2154 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2155 DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2156 vecShift, vecShift, replicate));
2162 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2163 SDValue VecOp = Op.getOperand(0);
2164 SDValue ValOp = Op.getOperand(1);
2165 SDValue IdxOp = Op.getOperand(2);
2166 DebugLoc dl = Op.getDebugLoc();
2167 EVT VT = Op.getValueType();
2168 EVT eltVT = ValOp.getValueType();
2170 // use 0 when the lane to insert to is 'undef'
2172 if (IdxOp.getOpcode() != ISD::UNDEF) {
2173 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2174 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2175 Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
2178 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2179 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2180 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2181 DAG.getRegister(SPU::R1, PtrVT),
2182 DAG.getConstant(Offset, PtrVT));
2183 // widen the mask when dealing with half vectors
2184 EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
2185 128/ VT.getVectorElementType().getSizeInBits());
2186 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
2189 DAG.getNode(SPUISD::SHUFB, dl, VT,
2190 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2192 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask));
2197 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2198 const TargetLowering &TLI)
2200 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2201 DebugLoc dl = Op.getDebugLoc();
2202 EVT ShiftVT = TLI.getShiftAmountTy();
2204 assert(Op.getValueType() == MVT::i8);
2207 llvm_unreachable("Unhandled i8 math operator");
2211 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2213 SDValue N1 = Op.getOperand(1);
2214 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2215 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2216 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2217 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2222 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2224 SDValue N1 = Op.getOperand(1);
2225 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2226 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2227 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2228 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2232 SDValue N1 = Op.getOperand(1);
2233 EVT N1VT = N1.getValueType();
2235 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2236 if (!N1VT.bitsEq(ShiftVT)) {
2237 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2240 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2243 // Replicate lower 8-bits into upper 8:
2245 DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2246 DAG.getNode(ISD::SHL, dl, MVT::i16,
2247 N0, DAG.getConstant(8, MVT::i32)));
2249 // Truncate back down to i8
2250 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2251 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2255 SDValue N1 = Op.getOperand(1);
2256 EVT N1VT = N1.getValueType();
2258 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2259 if (!N1VT.bitsEq(ShiftVT)) {
2260 unsigned N1Opc = ISD::ZERO_EXTEND;
2262 if (N1.getValueType().bitsGT(ShiftVT))
2263 N1Opc = ISD::TRUNCATE;
2265 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2268 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2269 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2272 SDValue N1 = Op.getOperand(1);
2273 EVT N1VT = N1.getValueType();
2275 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2276 if (!N1VT.bitsEq(ShiftVT)) {
2277 unsigned N1Opc = ISD::SIGN_EXTEND;
2279 if (N1VT.bitsGT(ShiftVT))
2280 N1Opc = ISD::TRUNCATE;
2281 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2284 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2285 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2288 SDValue N1 = Op.getOperand(1);
2290 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2291 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2292 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2293 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2301 //! Lower byte immediate operations for v16i8 vectors:
2303 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2306 EVT VT = Op.getValueType();
2307 DebugLoc dl = Op.getDebugLoc();
2309 ConstVec = Op.getOperand(0);
2310 Arg = Op.getOperand(1);
2311 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2312 if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
2313 ConstVec = ConstVec.getOperand(0);
2315 ConstVec = Op.getOperand(1);
2316 Arg = Op.getOperand(0);
2317 if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
2318 ConstVec = ConstVec.getOperand(0);
2323 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2324 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2325 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2327 APInt APSplatBits, APSplatUndef;
2328 unsigned SplatBitSize;
2330 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2332 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2333 HasAnyUndefs, minSplatBits)
2334 && minSplatBits <= SplatBitSize) {
2335 uint64_t SplatBits = APSplatBits.getZExtValue();
2336 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2338 SmallVector<SDValue, 16> tcVec;
2339 tcVec.assign(16, tc);
2340 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2341 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2345 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2346 // lowered. Return the operation, rather than a null SDValue.
2350 //! Custom lowering for CTPOP (count population)
2352 Custom lowering code that counts the number ones in the input
2353 operand. SPU has such an instruction, but it counts the number of
2354 ones per byte, which then have to be accumulated.
2356 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2357 EVT VT = Op.getValueType();
2358 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2359 VT, (128 / VT.getSizeInBits()));
2360 DebugLoc dl = Op.getDebugLoc();
2362 switch (VT.getSimpleVT().SimpleTy) {
2364 assert(false && "Invalid value type!");
2366 SDValue N = Op.getOperand(0);
2367 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2369 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2370 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2372 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2376 MachineFunction &MF = DAG.getMachineFunction();
2377 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2379 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2381 SDValue N = Op.getOperand(0);
2382 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2383 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2384 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2386 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2387 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2389 // CNTB_result becomes the chain to which all of the virtual registers
2390 // CNTB_reg, SUM1_reg become associated:
2391 SDValue CNTB_result =
2392 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2394 SDValue CNTB_rescopy =
2395 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2397 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2399 return DAG.getNode(ISD::AND, dl, MVT::i16,
2400 DAG.getNode(ISD::ADD, dl, MVT::i16,
2401 DAG.getNode(ISD::SRL, dl, MVT::i16,
2408 MachineFunction &MF = DAG.getMachineFunction();
2409 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2411 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2412 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2414 SDValue N = Op.getOperand(0);
2415 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2416 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2417 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2418 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2420 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2421 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2423 // CNTB_result becomes the chain to which all of the virtual registers
2424 // CNTB_reg, SUM1_reg become associated:
2425 SDValue CNTB_result =
2426 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2428 SDValue CNTB_rescopy =
2429 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2432 DAG.getNode(ISD::SRL, dl, MVT::i32,
2433 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2437 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2438 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2440 SDValue Sum1_rescopy =
2441 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2444 DAG.getNode(ISD::SRL, dl, MVT::i32,
2445 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2448 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2449 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2451 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2461 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2463 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2464 All conversions to i64 are expanded to a libcall.
2466 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2467 const SPUTargetLowering &TLI) {
2468 EVT OpVT = Op.getValueType();
2469 SDValue Op0 = Op.getOperand(0);
2470 EVT Op0VT = Op0.getValueType();
2472 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2473 || OpVT == MVT::i64) {
2474 // Convert f32 / f64 to i32 / i64 via libcall.
2476 (Op.getOpcode() == ISD::FP_TO_SINT)
2477 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2478 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2479 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2481 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2487 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2489 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2490 All conversions from i64 are expanded to a libcall.
2492 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2493 const SPUTargetLowering &TLI) {
2494 EVT OpVT = Op.getValueType();
2495 SDValue Op0 = Op.getOperand(0);
2496 EVT Op0VT = Op0.getValueType();
2498 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2499 || Op0VT == MVT::i64) {
2500 // Convert i32, i64 to f64 via libcall:
2502 (Op.getOpcode() == ISD::SINT_TO_FP)
2503 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2504 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2505 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2507 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2513 //! Lower ISD::SETCC
2515 This handles MVT::f64 (double floating point) condition lowering
2517 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2518 const TargetLowering &TLI) {
2519 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2520 DebugLoc dl = Op.getDebugLoc();
2521 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2523 SDValue lhs = Op.getOperand(0);
2524 SDValue rhs = Op.getOperand(1);
2525 EVT lhsVT = lhs.getValueType();
2526 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2528 EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2529 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2530 EVT IntVT(MVT::i64);
2532 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2533 // selected to a NOP:
2534 SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs);
2536 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2537 DAG.getNode(ISD::SRL, dl, IntVT,
2538 i64lhs, DAG.getConstant(32, MVT::i32)));
2539 SDValue lhsHi32abs =
2540 DAG.getNode(ISD::AND, dl, MVT::i32,
2541 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2543 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2545 // SETO and SETUO only use the lhs operand:
2546 if (CC->get() == ISD::SETO) {
2547 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2549 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2550 return DAG.getNode(ISD::XOR, dl, ccResultVT,
2551 DAG.getSetCC(dl, ccResultVT,
2552 lhs, DAG.getConstantFP(0.0, lhsVT),
2554 DAG.getConstant(ccResultAllOnes, ccResultVT));
2555 } else if (CC->get() == ISD::SETUO) {
2556 // Evaluates to true if Op0 is [SQ]NaN
2557 return DAG.getNode(ISD::AND, dl, ccResultVT,
2558 DAG.getSetCC(dl, ccResultVT,
2560 DAG.getConstant(0x7ff00000, MVT::i32),
2562 DAG.getSetCC(dl, ccResultVT,
2564 DAG.getConstant(0, MVT::i32),
2568 SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs);
2570 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2571 DAG.getNode(ISD::SRL, dl, IntVT,
2572 i64rhs, DAG.getConstant(32, MVT::i32)));
2574 // If a value is negative, subtract from the sign magnitude constant:
2575 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2577 // Convert the sign-magnitude representation into 2's complement:
2578 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2579 lhsHi32, DAG.getConstant(31, MVT::i32));
2580 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2582 DAG.getNode(ISD::SELECT, dl, IntVT,
2583 lhsSelectMask, lhsSignMag2TC, i64lhs);
2585 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2586 rhsHi32, DAG.getConstant(31, MVT::i32));
2587 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2589 DAG.getNode(ISD::SELECT, dl, IntVT,
2590 rhsSelectMask, rhsSignMag2TC, i64rhs);
2594 switch (CC->get()) {
2597 compareOp = ISD::SETEQ; break;
2600 compareOp = ISD::SETGT; break;
2603 compareOp = ISD::SETGE; break;
2606 compareOp = ISD::SETLT; break;
2609 compareOp = ISD::SETLE; break;
2612 compareOp = ISD::SETNE; break;
2614 report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
2618 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2619 (ISD::CondCode) compareOp);
2621 if ((CC->get() & 0x8) == 0) {
2622 // Ordered comparison:
2623 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2624 lhs, DAG.getConstantFP(0.0, MVT::f64),
2626 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2627 rhs, DAG.getConstantFP(0.0, MVT::f64),
2629 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2631 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2637 //! Lower ISD::SELECT_CC
2639 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2642 \note Need to revisit this in the future: if the code path through the true
2643 and false value computations is longer than the latency of a branch (6
2644 cycles), then it would be more advantageous to branch and insert a new basic
2645 block and branch on the condition. However, this code does not make that
2646 assumption, given the simplisitc uses so far.
2649 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2650 const TargetLowering &TLI) {
2651 EVT VT = Op.getValueType();
2652 SDValue lhs = Op.getOperand(0);
2653 SDValue rhs = Op.getOperand(1);
2654 SDValue trueval = Op.getOperand(2);
2655 SDValue falseval = Op.getOperand(3);
2656 SDValue condition = Op.getOperand(4);
2657 DebugLoc dl = Op.getDebugLoc();
2659 // NOTE: SELB's arguments: $rA, $rB, $mask
2661 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2662 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2663 // condition was true and 0s where the condition was false. Hence, the
2664 // arguments to SELB get reversed.
2666 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2667 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2668 // with another "cannot select select_cc" assert:
2670 SDValue compare = DAG.getNode(ISD::SETCC, dl,
2671 TLI.getSetCCResultType(Op.getValueType()),
2672 lhs, rhs, condition);
2673 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2676 //! Custom lower ISD::TRUNCATE
2677 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2679 // Type to truncate to
2680 EVT VT = Op.getValueType();
2681 MVT simpleVT = VT.getSimpleVT();
2682 EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2683 VT, (128 / VT.getSizeInBits()));
2684 DebugLoc dl = Op.getDebugLoc();
2686 // Type to truncate from
2687 SDValue Op0 = Op.getOperand(0);
2688 EVT Op0VT = Op0.getValueType();
2690 if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
2691 // Create shuffle mask, least significant doubleword of quadword
2692 unsigned maskHigh = 0x08090a0b;
2693 unsigned maskLow = 0x0c0d0e0f;
2694 // Use a shuffle to perform the truncation
2695 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2696 DAG.getConstant(maskHigh, MVT::i32),
2697 DAG.getConstant(maskLow, MVT::i32),
2698 DAG.getConstant(maskHigh, MVT::i32),
2699 DAG.getConstant(maskLow, MVT::i32));
2701 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2702 Op0, Op0, shufMask);
2704 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2707 return SDValue(); // Leave the truncate unmolested
2711 * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2712 * algorithm is to duplicate the sign bit using rotmai to generate at
2713 * least one byte full of sign bits. Then propagate the "sign-byte" into
2714 * the leftmost words and the i64/i32 into the rightmost words using shufb.
2716 * @param Op The sext operand
2717 * @param DAG The current DAG
2718 * @return The SDValue with the entire instruction sequence
2720 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2722 DebugLoc dl = Op.getDebugLoc();
2724 // Type to extend to
2725 MVT OpVT = Op.getValueType().getSimpleVT();
2727 // Type to extend from
2728 SDValue Op0 = Op.getOperand(0);
2729 MVT Op0VT = Op0.getValueType().getSimpleVT();
2731 // The type to extend to needs to be a i128 and
2732 // the type to extend from needs to be i64 or i32.
2733 assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2734 "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2736 // Create shuffle mask
2737 unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2738 unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11
2739 unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2740 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2741 DAG.getConstant(mask1, MVT::i32),
2742 DAG.getConstant(mask1, MVT::i32),
2743 DAG.getConstant(mask2, MVT::i32),
2744 DAG.getConstant(mask3, MVT::i32));
2746 // Word wise arithmetic right shift to generate at least one byte
2747 // that contains sign bits.
2748 MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2749 SDValue sraVal = DAG.getNode(ISD::SRA,
2752 DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2753 DAG.getConstant(31, MVT::i32));
2755 // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
2756 SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2758 DAG.getTargetConstant(
2759 SPU::GPRCRegClass.getID(),
2761 // Shuffle bytes - Copy the sign bits into the upper 64 bits
2762 // and the input value into the lower 64 bits.
2763 SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2764 extended, sraVal, shufMask);
2765 return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle);
2768 //! Custom (target-specific) lowering entry point
2770 This is where LLVM's DAG selection process calls to do target-specific
2774 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
2776 unsigned Opc = (unsigned) Op.getOpcode();
2777 EVT VT = Op.getValueType();
2782 errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2783 errs() << "Op.getOpcode() = " << Opc << "\n";
2784 errs() << "*Op.getNode():\n";
2785 Op.getNode()->dump();
2787 llvm_unreachable(0);
2793 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2795 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2796 case ISD::ConstantPool:
2797 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2798 case ISD::GlobalAddress:
2799 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2800 case ISD::JumpTable:
2801 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2802 case ISD::ConstantFP:
2803 return LowerConstantFP(Op, DAG);
2805 // i8, i64 math ops:
2814 return LowerI8Math(Op, DAG, Opc, *this);
2818 case ISD::FP_TO_SINT:
2819 case ISD::FP_TO_UINT:
2820 return LowerFP_TO_INT(Op, DAG, *this);
2822 case ISD::SINT_TO_FP:
2823 case ISD::UINT_TO_FP:
2824 return LowerINT_TO_FP(Op, DAG, *this);
2826 // Vector-related lowering.
2827 case ISD::BUILD_VECTOR:
2828 return LowerBUILD_VECTOR(Op, DAG);
2829 case ISD::SCALAR_TO_VECTOR:
2830 return LowerSCALAR_TO_VECTOR(Op, DAG);
2831 case ISD::VECTOR_SHUFFLE:
2832 return LowerVECTOR_SHUFFLE(Op, DAG);
2833 case ISD::EXTRACT_VECTOR_ELT:
2834 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2835 case ISD::INSERT_VECTOR_ELT:
2836 return LowerINSERT_VECTOR_ELT(Op, DAG);
2838 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2842 return LowerByteImmed(Op, DAG);
2844 // Vector and i8 multiply:
2847 return LowerI8Math(Op, DAG, Opc, *this);
2850 return LowerCTPOP(Op, DAG);
2852 case ISD::SELECT_CC:
2853 return LowerSELECT_CC(Op, DAG, *this);
2856 return LowerSETCC(Op, DAG, *this);
2859 return LowerTRUNCATE(Op, DAG);
2861 case ISD::SIGN_EXTEND:
2862 return LowerSIGN_EXTEND(Op, DAG);
2868 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2869 SmallVectorImpl<SDValue>&Results,
2870 SelectionDAG &DAG) const
2873 unsigned Opc = (unsigned) N->getOpcode();
2874 EVT OpVT = N->getValueType(0);
2878 errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2879 errs() << "Op.getOpcode() = " << Opc << "\n";
2880 errs() << "*Op.getNode():\n";
2888 /* Otherwise, return unchanged */
2891 //===----------------------------------------------------------------------===//
2892 // Target Optimization Hooks
2893 //===----------------------------------------------------------------------===//
2896 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2899 TargetMachine &TM = getTargetMachine();
2901 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2902 SelectionDAG &DAG = DCI.DAG;
2903 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2904 EVT NodeVT = N->getValueType(0); // The node's value type
2905 EVT Op0VT = Op0.getValueType(); // The first operand's result
2906 SDValue Result; // Initially, empty result
2907 DebugLoc dl = N->getDebugLoc();
2909 switch (N->getOpcode()) {
2912 SDValue Op1 = N->getOperand(1);
2914 if (Op0.getOpcode() == SPUISD::IndirectAddr
2915 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2916 // Normalize the operands to reduce repeated code
2917 SDValue IndirectArg = Op0, AddArg = Op1;
2919 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2924 if (isa<ConstantSDNode>(AddArg)) {
2925 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2926 SDValue IndOp1 = IndirectArg.getOperand(1);
2928 if (CN0->isNullValue()) {
2929 // (add (SPUindirect <arg>, <arg>), 0) ->
2930 // (SPUindirect <arg>, <arg>)
2932 #if !defined(NDEBUG)
2933 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2935 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2936 << "With: (SPUindirect <arg>, <arg>)\n";
2941 } else if (isa<ConstantSDNode>(IndOp1)) {
2942 // (add (SPUindirect <arg>, <const>), <const>) ->
2943 // (SPUindirect <arg>, <const + const>)
2944 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2945 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2946 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2948 #if !defined(NDEBUG)
2949 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2951 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2952 << "), " << CN0->getSExtValue() << ")\n"
2953 << "With: (SPUindirect <arg>, "
2954 << combinedConst << ")\n";
2958 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2959 IndirectArg, combinedValue);
2965 case ISD::SIGN_EXTEND:
2966 case ISD::ZERO_EXTEND:
2967 case ISD::ANY_EXTEND: {
2968 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2969 // (any_extend (SPUextract_elt0 <arg>)) ->
2970 // (SPUextract_elt0 <arg>)
2971 // Types must match, however...
2972 #if !defined(NDEBUG)
2973 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2974 errs() << "\nReplace: ";
2976 errs() << "\nWith: ";
2977 Op0.getNode()->dump(&DAG);
2986 case SPUISD::IndirectAddr: {
2987 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2988 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2989 if (CN != 0 && CN->isNullValue()) {
2990 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2991 // (SPUaform <addr>, 0)
2993 DEBUG(errs() << "Replace: ");
2994 DEBUG(N->dump(&DAG));
2995 DEBUG(errs() << "\nWith: ");
2996 DEBUG(Op0.getNode()->dump(&DAG));
2997 DEBUG(errs() << "\n");
3001 } else if (Op0.getOpcode() == ISD::ADD) {
3002 SDValue Op1 = N->getOperand(1);
3003 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
3004 // (SPUindirect (add <arg>, <arg>), 0) ->
3005 // (SPUindirect <arg>, <arg>)
3006 if (CN1->isNullValue()) {
3008 #if !defined(NDEBUG)
3009 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
3011 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
3012 << "With: (SPUindirect <arg>, <arg>)\n";
3016 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
3017 Op0.getOperand(0), Op0.getOperand(1));
3023 case SPUISD::SHL_BITS:
3024 case SPUISD::SHL_BYTES:
3025 case SPUISD::ROTBYTES_LEFT: {
3026 SDValue Op1 = N->getOperand(1);
3028 // Kill degenerate vector shifts:
3029 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
3030 if (CN->isNullValue()) {
3036 case SPUISD::PREFSLOT2VEC: {
3037 switch (Op0.getOpcode()) {
3040 case ISD::ANY_EXTEND:
3041 case ISD::ZERO_EXTEND:
3042 case ISD::SIGN_EXTEND: {
3043 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
3045 // but only if the SPUprefslot2vec and <arg> types match.
3046 SDValue Op00 = Op0.getOperand(0);
3047 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
3048 SDValue Op000 = Op00.getOperand(0);
3049 if (Op000.getValueType() == NodeVT) {
3055 case SPUISD::VEC2PREFSLOT: {
3056 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
3058 Result = Op0.getOperand(0);
3066 // Otherwise, return unchanged.
3068 if (Result.getNode()) {
3069 DEBUG(errs() << "\nReplace.SPU: ");
3070 DEBUG(N->dump(&DAG));
3071 DEBUG(errs() << "\nWith: ");
3072 DEBUG(Result.getNode()->dump(&DAG));
3073 DEBUG(errs() << "\n");
3080 //===----------------------------------------------------------------------===//
3081 // Inline Assembly Support
3082 //===----------------------------------------------------------------------===//
3084 /// getConstraintType - Given a constraint letter, return the type of
3085 /// constraint it is for this target.
3086 SPUTargetLowering::ConstraintType
3087 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3088 if (ConstraintLetter.size() == 1) {
3089 switch (ConstraintLetter[0]) {
3096 return C_RegisterClass;
3099 return TargetLowering::getConstraintType(ConstraintLetter);
3102 /// Examine constraint type and operand type and determine a weight value.
3103 /// This object must already have been set up with the operand type
3104 /// and the current alternative constraint selected.
3105 TargetLowering::ConstraintWeight
3106 SPUTargetLowering::getSingleConstraintMatchWeight(
3107 AsmOperandInfo &info, const char *constraint) const {
3108 ConstraintWeight weight = CW_Invalid;
3109 Value *CallOperandVal = info.CallOperandVal;
3110 // If we don't have a value, we can't do a match,
3111 // but allow it at the lowest weight.
3112 if (CallOperandVal == NULL)
3114 // Look at the constraint type.
3115 switch (*constraint) {
3117 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
3119 //FIXME: Seems like the supported constraint letters were just copied
3120 // from PPC, as the following doesn't correspond to the GCC docs.
3121 // I'm leaving it so until someone adds the corresponding lowering support.
3128 weight = CW_Register;
3134 std::pair<unsigned, const TargetRegisterClass*>
3135 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3138 if (Constraint.size() == 1) {
3139 // GCC RS6000 Constraint Letters
3140 switch (Constraint[0]) {
3144 return std::make_pair(0U, SPU::R64CRegisterClass);
3145 return std::make_pair(0U, SPU::R32CRegisterClass);
3148 return std::make_pair(0U, SPU::R32FPRegisterClass);
3149 else if (VT == MVT::f64)
3150 return std::make_pair(0U, SPU::R64FPRegisterClass);
3153 return std::make_pair(0U, SPU::GPRCRegisterClass);
3157 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3160 //! Compute used/known bits for a SPU operand
3162 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3166 const SelectionDAG &DAG,
3167 unsigned Depth ) const {
3169 const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3171 switch (Op.getOpcode()) {
3173 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3179 case SPUISD::PREFSLOT2VEC:
3180 case SPUISD::LDRESULT:
3181 case SPUISD::VEC2PREFSLOT:
3182 case SPUISD::SHLQUAD_L_BITS:
3183 case SPUISD::SHLQUAD_L_BYTES:
3184 case SPUISD::VEC_ROTL:
3185 case SPUISD::VEC_ROTR:
3186 case SPUISD::ROTBYTES_LEFT:
3187 case SPUISD::SELECT_MASK:
3194 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3195 unsigned Depth) const {
3196 switch (Op.getOpcode()) {
3201 EVT VT = Op.getValueType();
3203 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3206 return VT.getSizeInBits();
3211 // LowerAsmOperandForConstraint
3213 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3214 char ConstraintLetter,
3215 std::vector<SDValue> &Ops,
3216 SelectionDAG &DAG) const {
3217 // Default, for the time being, to the base class handler
3218 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3221 /// isLegalAddressImmediate - Return true if the integer value can be used
3222 /// as the offset of the target addressing mode.
3223 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3224 const Type *Ty) const {
3225 // SPU's addresses are 256K:
3226 return (V > -(1 << 18) && V < (1 << 18) - 1);
3229 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3234 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3235 // The SPU target isn't yet aware of offsets.
3239 // can we compare to Imm without writing it into a register?
3240 bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
3241 //ceqi, cgti, etc. all take s10 operand
3242 return isInt<10>(Imm);
3246 SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
3247 const Type * ) const{
3249 // A-form: 18bit absolute address.
3250 if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
3253 // D-form: reg + 14bit offset
3254 if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
3258 if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)