1 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "llvm/ADT/VectorExtras.h"
18 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
19 #include "llvm/CodeGen/CallingConvLower.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/Constants.h"
26 #include "llvm/Function.h"
27 #include "llvm/Intrinsics.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Target/TargetOptions.h"
36 // Used in getTargetNodeName() below
38 std::map<unsigned, const char *> node_names;
40 //! MVT::ValueType mapping to useful data for Cell SPU
41 struct valtype_map_s {
42 const MVT::ValueType valtype;
43 const int prefslot_byte;
46 const valtype_map_s valtype_map[] = {
57 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59 const valtype_map_s *getValueTypeMapEntry(MVT::ValueType VT) {
60 const valtype_map_s *retval = 0;
62 for (size_t i = 0; i < n_valtype_map; ++i) {
63 if (valtype_map[i].valtype == VT) {
64 retval = valtype_map + i;
71 cerr << "getValueTypeMapEntry returns NULL for "
72 << MVT::getValueTypeString(VT)
81 //! Predicate that returns true if operand is a memory target
83 \arg Op Operand to test
84 \return true if the operand is a memory target (i.e., global
85 address, external symbol, constant pool) or an A-form
88 bool isMemoryOperand(const SDOperand &Op)
90 const unsigned Opc = Op.getOpcode();
91 return (Opc == ISD::GlobalAddress
92 || Opc == ISD::GlobalTLSAddress
93 /* || Opc == ISD::FrameIndex */
94 || Opc == ISD::JumpTable
95 || Opc == ISD::ConstantPool
96 || Opc == ISD::ExternalSymbol
97 || Opc == ISD::TargetGlobalAddress
98 || Opc == ISD::TargetGlobalTLSAddress
99 /* || Opc == ISD::TargetFrameIndex */
100 || Opc == ISD::TargetJumpTable
101 || Opc == ISD::TargetConstantPool
102 || Opc == ISD::TargetExternalSymbol
103 || Opc == SPUISD::AFormAddr);
107 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
108 : TargetLowering(TM),
111 // Fold away setcc operations if possible.
114 // Use _setjmp/_longjmp instead of setjmp/longjmp.
115 setUseUnderscoreSetJmp(true);
116 setUseUnderscoreLongJmp(true);
118 // Set up the SPU's register classes:
119 // NOTE: i8 register class is not registered because we cannot determine when
120 // we need to zero or sign extend for custom-lowered loads and stores.
121 // NOTE: Ignore the previous note. For now. :-)
122 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
123 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
124 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
125 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
126 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
127 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
128 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
130 // SPU has no sign or zero extended loads for i1, i8, i16:
131 setLoadXAction(ISD::EXTLOAD, MVT::i1, Custom);
132 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
133 setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
134 setStoreXAction(MVT::i1, Custom);
136 setLoadXAction(ISD::EXTLOAD, MVT::i8, Custom);
137 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
138 setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
139 setStoreXAction(MVT::i8, Custom);
141 setLoadXAction(ISD::EXTLOAD, MVT::i16, Custom);
142 setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
143 setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
145 // SPU constant load actions are custom lowered:
146 setOperationAction(ISD::Constant, MVT::i64, Custom);
147 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
148 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
150 // SPU's loads and stores have to be custom lowered:
151 for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
153 setOperationAction(ISD::LOAD, sctype, Custom);
154 setOperationAction(ISD::STORE, sctype, Custom);
157 // SPU supports BRCOND, although DAGCombine will convert BRCONDs
158 // into BR_CCs. BR_CC instructions are custom selected in
160 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
162 // Expand the jumptable branches
163 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
164 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
165 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
167 // SPU has no intrinsics for these particular operations:
168 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
169 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
170 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
172 // PowerPC has no SREM/UREM instructions
173 setOperationAction(ISD::SREM, MVT::i32, Expand);
174 setOperationAction(ISD::UREM, MVT::i32, Expand);
175 setOperationAction(ISD::SREM, MVT::i64, Expand);
176 setOperationAction(ISD::UREM, MVT::i64, Expand);
178 // We don't support sin/cos/sqrt/fmod
179 setOperationAction(ISD::FSIN , MVT::f64, Expand);
180 setOperationAction(ISD::FCOS , MVT::f64, Expand);
181 setOperationAction(ISD::FREM , MVT::f64, Expand);
182 setOperationAction(ISD::FSIN , MVT::f32, Expand);
183 setOperationAction(ISD::FCOS , MVT::f32, Expand);
184 setOperationAction(ISD::FREM , MVT::f32, Expand);
186 // If we're enabling GP optimizations, use hardware square root
187 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
188 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
190 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
191 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
193 // SPU can do rotate right and left, so legalize it... but customize for i8
194 // because instructions don't exist.
195 setOperationAction(ISD::ROTR, MVT::i32, Legal);
196 setOperationAction(ISD::ROTR, MVT::i16, Legal);
197 setOperationAction(ISD::ROTR, MVT::i8, Custom);
198 setOperationAction(ISD::ROTL, MVT::i32, Legal);
199 setOperationAction(ISD::ROTL, MVT::i16, Legal);
200 setOperationAction(ISD::ROTL, MVT::i8, Custom);
201 // SPU has no native version of shift left/right for i8
202 setOperationAction(ISD::SHL, MVT::i8, Custom);
203 setOperationAction(ISD::SRL, MVT::i8, Custom);
204 setOperationAction(ISD::SRA, MVT::i8, Custom);
206 // Custom lower i32 multiplications
207 setOperationAction(ISD::MUL, MVT::i32, Custom);
209 // Need to custom handle (some) common i8 math ops
210 setOperationAction(ISD::SUB, MVT::i8, Custom);
211 setOperationAction(ISD::MUL, MVT::i8, Custom);
213 // SPU does not have BSWAP. It does have i32 support CTLZ.
214 // CTPOP has to be custom lowered.
215 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
216 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
218 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
219 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
220 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
221 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
223 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
224 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
226 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
228 // SPU does not have select or setcc
229 setOperationAction(ISD::SELECT, MVT::i1, Expand);
230 setOperationAction(ISD::SELECT, MVT::i8, Expand);
231 setOperationAction(ISD::SELECT, MVT::i16, Expand);
232 setOperationAction(ISD::SELECT, MVT::i32, Expand);
233 setOperationAction(ISD::SELECT, MVT::i64, Expand);
234 setOperationAction(ISD::SELECT, MVT::f32, Expand);
235 setOperationAction(ISD::SELECT, MVT::f64, Expand);
237 setOperationAction(ISD::SETCC, MVT::i1, Expand);
238 setOperationAction(ISD::SETCC, MVT::i8, Expand);
239 setOperationAction(ISD::SETCC, MVT::i16, Expand);
240 setOperationAction(ISD::SETCC, MVT::i32, Expand);
241 setOperationAction(ISD::SETCC, MVT::i64, Expand);
242 setOperationAction(ISD::SETCC, MVT::f32, Expand);
243 setOperationAction(ISD::SETCC, MVT::f64, Expand);
245 // SPU has a legal FP -> signed INT instruction
246 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
247 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
248 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
249 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
251 // FDIV on SPU requires custom lowering
252 setOperationAction(ISD::FDIV, MVT::f32, Custom);
253 //setOperationAction(ISD::FDIV, MVT::f64, Custom);
255 // SPU has [U|S]INT_TO_FP
256 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
257 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
258 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
259 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
260 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
261 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
262 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
263 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
265 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
266 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
267 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
268 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
270 // We cannot sextinreg(i1). Expand to shifts.
271 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
273 // Support label based line numbers.
274 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
275 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
277 // We want to legalize GlobalAddress and ConstantPool nodes into the
278 // appropriate instructions to materialize the address.
279 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
280 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
281 setOperationAction(ISD::ConstantPool, MVT::f32, Custom);
282 setOperationAction(ISD::JumpTable, MVT::i32, Custom);
283 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
284 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
285 setOperationAction(ISD::ConstantPool, MVT::f64, Custom);
286 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
288 // RET must be custom lowered, to meet ABI requirements
289 setOperationAction(ISD::RET, MVT::Other, Custom);
291 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
292 setOperationAction(ISD::VASTART , MVT::Other, Custom);
294 // Use the default implementation.
295 setOperationAction(ISD::VAARG , MVT::Other, Expand);
296 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
297 setOperationAction(ISD::VAEND , MVT::Other, Expand);
298 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
299 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
300 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
301 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
303 // Cell SPU has instructions for converting between i64 and fp.
304 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
305 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
307 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
308 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
310 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
311 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
313 // First set operation action for all vector types to expand. Then we
314 // will selectively turn on ones that can be effectively codegen'd.
315 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
316 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
317 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
318 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
319 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
320 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
322 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
323 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
324 // add/sub are legal for all supported vector VT's.
325 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal);
326 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal);
327 // mul has to be custom lowered.
328 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Custom);
330 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal);
331 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal);
332 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal);
333 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Legal);
334 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Legal);
335 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Legal);
337 // These operations need to be expanded:
338 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
339 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
340 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
341 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
342 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Custom);
344 // Custom lower build_vector, constant pool spills, insert and
345 // extract vector elements:
346 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
347 setOperationAction(ISD::ConstantPool, (MVT::ValueType)VT, Custom);
348 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Custom);
349 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
350 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
351 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
354 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
355 setOperationAction(ISD::AND, MVT::v16i8, Custom);
356 setOperationAction(ISD::OR, MVT::v16i8, Custom);
357 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
358 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
360 setSetCCResultType(MVT::i32);
361 setShiftAmountType(MVT::i32);
362 setSetCCResultContents(ZeroOrOneSetCCResult);
364 setStackPointerRegisterToSaveRestore(SPU::R1);
366 // We have target-specific dag combine patterns for the following nodes:
367 // e.g., setTargetDAGCombine(ISD::SUB);
369 computeRegisterProperties();
373 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
375 if (node_names.empty()) {
376 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
377 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
378 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
379 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
380 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
381 node_names[(unsigned) SPUISD::DFormAddr] = "SPUISD::DFormAddr";
382 node_names[(unsigned) SPUISD::XFormAddr] = "SPUISD::XFormAddr";
383 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
384 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
385 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
386 node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
387 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
388 node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
389 node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
390 node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
391 node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
392 node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
393 node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
394 node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
395 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
396 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
397 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
398 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
399 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
400 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
401 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
402 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
403 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
404 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_Z] =
405 "SPUISD::ROTBYTES_RIGHT_Z";
406 node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
407 "SPUISD::ROTBYTES_RIGHT_S";
408 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
409 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
410 "SPUISD::ROTBYTES_LEFT_CHAINED";
411 node_names[(unsigned) SPUISD::FSMBI] = "SPUISD::FSMBI";
412 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
413 node_names[(unsigned) SPUISD::SFPConstant] = "SPUISD::SFPConstant";
414 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
415 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
416 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
419 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
421 return ((i != node_names.end()) ? i->second : 0);
424 //===----------------------------------------------------------------------===//
425 // Calling convention code:
426 //===----------------------------------------------------------------------===//
428 #include "SPUGenCallingConv.inc"
430 //===----------------------------------------------------------------------===//
431 // LowerOperation implementation
432 //===----------------------------------------------------------------------===//
434 /// Aligned load common code for CellSPU
436 \param[in] Op The SelectionDAG load or store operand
437 \param[in] DAG The selection DAG
438 \param[in] ST CellSPU subtarget information structure
439 \param[in,out] alignment Caller initializes this to the load or store node's
440 value from getAlignment(), may be updated while generating the aligned load
441 \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
442 offset (divisible by 16, modulo 16 == 0)
443 \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
444 offset of the preferred slot (modulo 16 != 0)
445 \param[in,out] VT Caller initializes this value type to the the load or store
446 node's loaded or stored value type; may be updated if an i1-extended load or
448 \param[out] was16aligned true if the base pointer had 16-byte alignment,
449 otherwise false. Can help to determine if the chunk needs to be rotated.
451 Both load and store lowering load a block of data aligned on a 16-byte
452 boundary. This is the common aligned load code shared between both.
455 AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
457 unsigned &alignment, int &alignOffs, int &prefSlotOffs,
458 unsigned &VT, bool &was16aligned)
460 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
461 const valtype_map_s *vtm = getValueTypeMapEntry(VT);
462 SDOperand basePtr = LSN->getBasePtr();
463 SDOperand chain = LSN->getChain();
465 if (basePtr.getOpcode() == ISD::ADD) {
466 SDOperand Op1 = basePtr.Val->getOperand(1);
468 if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
469 const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.Val->getOperand(1));
471 alignOffs = (int) CN->getValue();
472 prefSlotOffs = (int) (alignOffs & 0xf);
474 // Adjust the rotation amount to ensure that the final result ends up in
475 // the preferred slot:
476 prefSlotOffs -= vtm->prefslot_byte;
477 basePtr = basePtr.getOperand(0);
479 // Modify alignment, since the ADD is likely from getElementPtr:
480 switch (basePtr.getOpcode()) {
481 case ISD::GlobalAddress:
482 case ISD::TargetGlobalAddress: {
483 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(basePtr.Val);
484 const GlobalValue *GV = GN->getGlobal();
485 alignment = GV->getAlignment();
491 prefSlotOffs = -vtm->prefslot_byte;
495 prefSlotOffs = -vtm->prefslot_byte;
498 if (alignment == 16) {
499 // Realign the base pointer as a D-Form address:
500 if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
501 if (isMemoryOperand(basePtr)) {
502 SDOperand Zero = DAG.getConstant(0, PtrVT);
503 unsigned Opc = (!ST->usingLargeMem()
505 : SPUISD::XFormAddr);
506 basePtr = DAG.getNode(Opc, PtrVT, basePtr, Zero);
508 basePtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
509 basePtr, DAG.getConstant((alignOffs & ~0xf), PtrVT));
512 // Emit the vector load:
514 return DAG.getLoad(MVT::v16i8, chain, basePtr,
515 LSN->getSrcValue(), LSN->getSrcValueOffset(),
516 LSN->isVolatile(), 16);
519 // Unaligned load or we're using the "large memory" model, which means that
520 // we have to be very pessimistic:
521 if (isMemoryOperand(basePtr)) {
522 basePtr = DAG.getNode(SPUISD::XFormAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
526 basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, DAG.getConstant(alignOffs, PtrVT));
527 was16aligned = false;
528 return DAG.getLoad(MVT::v16i8, chain, basePtr,
529 LSN->getSrcValue(), LSN->getSrcValueOffset(),
530 LSN->isVolatile(), 16);
533 /// Custom lower loads for CellSPU
535 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
536 within a 16-byte block, we have to rotate to extract the requested element.
539 LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
540 LoadSDNode *LN = cast<LoadSDNode>(Op);
541 SDOperand the_chain = LN->getChain();
542 MVT::ValueType VT = LN->getLoadedVT();
543 MVT::ValueType OpVT = Op.Val->getValueType(0);
544 ISD::LoadExtType ExtType = LN->getExtensionType();
545 unsigned alignment = LN->getAlignment();
548 // For an extending load of an i1 variable, just call it i8 (or whatever we
549 // were passed) and make it zero-extended:
552 ExtType = ISD::ZEXTLOAD;
555 switch (LN->getAddressingMode()) {
556 case ISD::UNINDEXED: {
560 AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
565 the_chain = result.getValue(1);
566 // Rotate the chunk if necessary
570 SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
575 Ops[2] = DAG.getConstant(rotamt, MVT::i16);
577 LoadSDNode *LN1 = cast<LoadSDNode>(result);
580 Ops[2] = LN1->getBasePtr();
583 result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
584 the_chain = result.getValue(1);
587 if (VT == OpVT || ExtType == ISD::EXTLOAD) {
589 MVT::ValueType vecVT = MVT::v16i8;
591 // Convert the loaded v16i8 vector to the appropriate vector type
592 // specified by the operand:
595 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
597 vecVT = MVT::getVectorType(OpVT, (128 / MVT::getSizeInBits(OpVT)));
600 Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
601 scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
602 result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
603 the_chain = result.getValue(1);
605 // Handle the sign and zero-extending loads for i1 and i8:
608 if (ExtType == ISD::SEXTLOAD) {
609 NewOpC = (OpVT == MVT::i1
610 ? SPUISD::EXTRACT_I1_SEXT
611 : SPUISD::EXTRACT_I8_SEXT);
613 assert(ExtType == ISD::ZEXTLOAD);
614 NewOpC = (OpVT == MVT::i1
615 ? SPUISD::EXTRACT_I1_ZEXT
616 : SPUISD::EXTRACT_I8_ZEXT);
619 result = DAG.getNode(NewOpC, OpVT, result);
622 SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
623 SDOperand retops[2] = { result, the_chain };
625 result = DAG.getNode(SPUISD::LDRESULT, retvts, retops, 2);
632 case ISD::LAST_INDEXED_MODE:
633 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
635 cerr << (unsigned) LN->getAddressingMode() << "\n";
643 /// Custom lower stores for CellSPU
645 All CellSPU stores are aligned to 16-byte boundaries, so for elements
646 within a 16-byte block, we have to generate a shuffle to insert the
647 requested element into its place, then store the resulting block.
650 LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
651 StoreSDNode *SN = cast<StoreSDNode>(Op);
652 SDOperand Value = SN->getValue();
653 MVT::ValueType VT = Value.getValueType();
654 MVT::ValueType StVT = (!SN->isTruncatingStore() ? VT : SN->getStoredVT());
655 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
656 unsigned alignment = SN->getAlignment();
658 switch (SN->getAddressingMode()) {
659 case ISD::UNINDEXED: {
660 int chunk_offset, slot_offset;
663 // The vector type we really want to load from the 16-byte chunk, except
664 // in the case of MVT::i1, which has to be v16i8.
665 unsigned vecVT, stVecVT = MVT::v16i8;
668 stVecVT = MVT::getVectorType(StVT, (128 / MVT::getSizeInBits(StVT)));
669 vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
671 SDOperand alignLoadVec =
672 AlignedLoad(Op, DAG, ST, SN, alignment,
673 chunk_offset, slot_offset, VT, was16aligned);
675 if (alignLoadVec.Val == 0)
678 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
679 SDOperand basePtr = LN->getBasePtr();
680 SDOperand the_chain = alignLoadVec.getValue(1);
681 SDOperand theValue = SN->getValue();
685 && (theValue.getOpcode() == ISD::AssertZext
686 || theValue.getOpcode() == ISD::AssertSext)) {
687 // Drill down and get the value for zero- and sign-extended
689 theValue = theValue.getOperand(0);
693 chunk_offset /= (MVT::getSizeInBits(StVT == MVT::i1 ? (unsigned) MVT::i8 : StVT) / 8);
695 SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
696 SDOperand insertEltPtr;
697 SDOperand insertEltOp;
699 // If the base pointer is already a D-form address, then just create
700 // a new D-form address with a slot offset and the orignal base pointer.
701 // Otherwise generate a D-form address with the slot offset relative
702 // to the stack pointer, which is always aligned.
703 if (basePtr.getOpcode() == SPUISD::DFormAddr) {
704 insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
705 basePtr.getOperand(0),
708 insertEltPtr = DAG.getNode(SPUISD::DFormAddr, PtrVT,
709 DAG.getRegister(SPU::R1, PtrVT),
713 insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
714 result = DAG.getNode(SPUISD::SHUFB, vecVT,
715 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
717 DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
719 result = DAG.getStore(the_chain, result, basePtr,
720 LN->getSrcValue(), LN->getSrcValueOffset(),
721 LN->isVolatile(), LN->getAlignment());
730 case ISD::LAST_INDEXED_MODE:
731 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
733 cerr << (unsigned) SN->getAddressingMode() << "\n";
741 /// Generate the address of a constant pool entry.
743 LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
744 MVT::ValueType PtrVT = Op.getValueType();
745 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
746 Constant *C = CP->getConstVal();
747 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
748 SDOperand Zero = DAG.getConstant(0, PtrVT);
749 const TargetMachine &TM = DAG.getTarget();
751 if (TM.getRelocationModel() == Reloc::Static) {
752 if (!ST->usingLargeMem()) {
753 // Just return the SDOperand with the constant pool address in it.
757 // Generate hi/lo address pair
758 SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
759 SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
761 return DAG.getNode(ISD::ADD, PtrVT, Lo, Hi);
763 return DAG.getNode(SPUISD::XFormAddr, PtrVT, CPI, Zero);
769 "LowerConstantPool: Relocation model other than static not supported.");
774 LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
775 MVT::ValueType PtrVT = Op.getValueType();
776 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
777 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
778 SDOperand Zero = DAG.getConstant(0, PtrVT);
779 const TargetMachine &TM = DAG.getTarget();
781 if (TM.getRelocationModel() == Reloc::Static) {
782 return (!ST->usingLargeMem()
784 : DAG.getNode(SPUISD::XFormAddr, PtrVT, JTI, Zero));
788 "LowerJumpTable: Relocation model other than static not supported.");
793 LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
794 MVT::ValueType PtrVT = Op.getValueType();
795 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
796 GlobalValue *GV = GSDN->getGlobal();
797 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
798 const TargetMachine &TM = DAG.getTarget();
799 SDOperand Zero = DAG.getConstant(0, PtrVT);
801 if (TM.getRelocationModel() == Reloc::Static) {
802 return (!ST->usingLargeMem()
804 : DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero));
806 cerr << "LowerGlobalAddress: Relocation model other than static not "
815 //! Custom lower i64 integer constants
817 This code inserts all of the necessary juggling that needs to occur to load
818 a 64-bit constant into a register.
821 LowerConstant(SDOperand Op, SelectionDAG &DAG) {
822 unsigned VT = Op.getValueType();
823 ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
825 if (VT == MVT::i64) {
826 SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
827 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
828 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
831 cerr << "LowerConstant: unhandled constant type "
832 << MVT::getValueTypeString(VT)
841 //! Custom lower single precision floating point constants
843 "float" immediates can be lowered as if they were unsigned 32-bit integers.
844 The SPUISD::SFPConstant pseudo-instruction handles this in the instruction
848 LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
849 unsigned VT = Op.getValueType();
850 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
853 "LowerConstantFP: Node is not ConstantFPSDNode");
855 if (VT == MVT::f32) {
856 float targetConst = FP->getValueAPF().convertToFloat();
857 return DAG.getNode(SPUISD::SFPConstant, VT,
858 DAG.getTargetConstantFP(targetConst, VT));
859 } else if (VT == MVT::f64) {
860 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
861 return DAG.getNode(ISD::BIT_CONVERT, VT,
862 LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
869 LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
871 MachineFunction &MF = DAG.getMachineFunction();
872 MachineFrameInfo *MFI = MF.getFrameInfo();
873 MachineRegisterInfo &RegInfo = MF.getRegInfo();
874 SmallVector<SDOperand, 8> ArgValues;
875 SDOperand Root = Op.getOperand(0);
876 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
878 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
879 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
881 unsigned ArgOffset = SPUFrameInfo::minStackSize();
882 unsigned ArgRegIdx = 0;
883 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
885 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
887 // Add DAG nodes to load the arguments or copy them out of registers.
888 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
890 bool needsLoad = false;
891 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType();
892 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8;
896 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
897 << MVT::getValueTypeString(ObjectVT)
902 if (!isVarArg && ArgRegIdx < NumArgRegs) {
903 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
904 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
905 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
912 if (!isVarArg && ArgRegIdx < NumArgRegs) {
913 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
914 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
915 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
922 if (!isVarArg && ArgRegIdx < NumArgRegs) {
923 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
924 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
925 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
932 if (!isVarArg && ArgRegIdx < NumArgRegs) {
933 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
934 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
935 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
942 if (!isVarArg && ArgRegIdx < NumArgRegs) {
943 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
944 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
945 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
952 if (!isVarArg && ArgRegIdx < NumArgRegs) {
953 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
954 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
955 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
966 if (!isVarArg && ArgRegIdx < NumArgRegs) {
967 unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
968 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
969 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
977 // We need to load the argument to a virtual register if we determined above
978 // that we ran out of physical registers of the appropriate type
980 // If the argument is actually used, emit a load from the right stack
982 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) {
983 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
984 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
985 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
987 // Don't emit a dead load.
988 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT);
991 ArgOffset += StackSlotSize;
994 ArgValues.push_back(ArgVal);
997 // If the function takes variable number of arguments, make a frame index for
998 // the start of the first vararg value... for expansion of llvm.va_start.
1000 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8,
1002 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1003 // If this function is vararg, store any remaining integer argument regs to
1004 // their spots on the stack so that they may be loaded by deferencing the
1005 // result of va_next.
1006 SmallVector<SDOperand, 8> MemOps;
1007 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1008 unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1009 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1010 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1011 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1012 MemOps.push_back(Store);
1013 // Increment the address by four for the next argument to store
1014 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT);
1015 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1017 if (!MemOps.empty())
1018 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1021 ArgValues.push_back(Root);
1023 // Return the new list of results.
1024 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(),
1025 Op.Val->value_end());
1026 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1029 /// isLSAAddress - Return the immediate to use if the specified
1030 /// value is representable as a LSA address.
1031 static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1032 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1035 int Addr = C->getValue();
1036 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1037 (Addr << 14 >> 14) != Addr)
1038 return 0; // Top 14 bits have to be sext of immediate.
1040 return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1045 LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1046 SDOperand Chain = Op.getOperand(0);
1048 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1049 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1051 SDOperand Callee = Op.getOperand(4);
1052 unsigned NumOps = (Op.getNumOperands() - 5) / 2;
1053 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1054 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1055 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1057 // Handy pointer type
1058 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1060 // Accumulate how many bytes are to be pushed on the stack, including the
1061 // linkage area, and parameter passing area. According to the SPU ABI,
1062 // we minimally need space for [LR] and [SP]
1063 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1065 // Set up a copy of the stack pointer for use loading and storing any
1066 // arguments that may not fit in the registers available for argument
1068 SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1070 // Figure out which arguments are going to go in registers, and which in
1072 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1073 unsigned ArgRegIdx = 0;
1075 // Keep track of registers passing arguments
1076 std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1077 // And the arguments passed on the stack
1078 SmallVector<SDOperand, 8> MemOpChains;
1080 for (unsigned i = 0; i != NumOps; ++i) {
1081 SDOperand Arg = Op.getOperand(5+2*i);
1083 // PtrOff will be used to store the current argument to the stack if a
1084 // register cannot be found for it.
1085 SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1086 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1088 switch (Arg.getValueType()) {
1089 default: assert(0 && "Unexpected ValueType for argument!");
1093 if (ArgRegIdx != NumArgRegs) {
1094 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1096 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1097 ArgOffset += StackSlotSize;
1102 if (ArgRegIdx != NumArgRegs) {
1103 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1105 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1106 ArgOffset += StackSlotSize;
1113 if (ArgRegIdx != NumArgRegs) {
1114 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1116 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1117 ArgOffset += StackSlotSize;
1123 // Update number of stack bytes actually used, insert a call sequence start
1124 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1125 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1127 if (!MemOpChains.empty()) {
1128 // Adjust the stack pointer for the stack arguments.
1129 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1130 &MemOpChains[0], MemOpChains.size());
1133 // Build a sequence of copy-to-reg nodes chained together with token chain
1134 // and flag operands which copy the outgoing args into the appropriate regs.
1136 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1137 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1139 InFlag = Chain.getValue(1);
1142 std::vector<MVT::ValueType> NodeTys;
1143 NodeTys.push_back(MVT::Other); // Returns a chain
1144 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use.
1146 SmallVector<SDOperand, 8> Ops;
1147 unsigned CallOpc = SPUISD::CALL;
1149 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1150 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1151 // node so that legalize doesn't hack it.
1152 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1153 GlobalValue *GV = G->getGlobal();
1154 unsigned CalleeVT = Callee.getValueType();
1155 SDOperand Zero = DAG.getConstant(0, PtrVT);
1156 SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1158 if (!ST->usingLargeMem()) {
1159 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1160 // style calls, otherwise, external symbols are BRASL calls. This assumes
1161 // that declared/defined symbols are in the same compilation unit and can
1162 // be reached through PC-relative jumps.
1165 // This may be an unsafe assumption for JIT and really large compilation
1167 if (GV->isDeclaration()) {
1168 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1170 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1173 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1175 Callee = DAG.getNode(SPUISD::XFormAddr, PtrVT, GA, Zero);
1177 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1178 Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1179 else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1180 // If this is an absolute destination address that appears to be a legal
1181 // local store address, use the munged value.
1182 Callee = SDOperand(Dest, 0);
1185 Ops.push_back(Chain);
1186 Ops.push_back(Callee);
1188 // Add argument registers to the end of the list so that they are known live
1190 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1191 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1192 RegsToPass[i].second.getValueType()));
1195 Ops.push_back(InFlag);
1196 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1197 InFlag = Chain.getValue(1);
1199 SDOperand ResultVals[3];
1200 unsigned NumResults = 0;
1203 // If the call has results, copy the values out of the ret val registers.
1204 switch (Op.Val->getValueType(0)) {
1205 default: assert(0 && "Unexpected ret value!");
1206 case MVT::Other: break;
1208 if (Op.Val->getValueType(1) == MVT::i32) {
1209 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1210 ResultVals[0] = Chain.getValue(0);
1211 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1212 Chain.getValue(2)).getValue(1);
1213 ResultVals[1] = Chain.getValue(0);
1215 NodeTys.push_back(MVT::i32);
1217 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1218 ResultVals[0] = Chain.getValue(0);
1221 NodeTys.push_back(MVT::i32);
1224 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1225 ResultVals[0] = Chain.getValue(0);
1227 NodeTys.push_back(MVT::i64);
1231 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1232 InFlag).getValue(1);
1233 ResultVals[0] = Chain.getValue(0);
1235 NodeTys.push_back(Op.Val->getValueType(0));
1242 Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1243 InFlag).getValue(1);
1244 ResultVals[0] = Chain.getValue(0);
1246 NodeTys.push_back(Op.Val->getValueType(0));
1250 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1251 DAG.getConstant(NumStackBytes, PtrVT));
1252 NodeTys.push_back(MVT::Other);
1254 // If the function returns void, just return the chain.
1255 if (NumResults == 0)
1258 // Otherwise, merge everything together with a MERGE_VALUES node.
1259 ResultVals[NumResults++] = Chain;
1260 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1261 ResultVals, NumResults);
1262 return Res.getValue(Op.ResNo);
1266 LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1267 SmallVector<CCValAssign, 16> RVLocs;
1268 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1269 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1270 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1271 CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1273 // If this is the first return lowered for this function, add the regs to the
1274 // liveout set for the function.
1275 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1276 for (unsigned i = 0; i != RVLocs.size(); ++i)
1277 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1280 SDOperand Chain = Op.getOperand(0);
1283 // Copy the result values into the output registers.
1284 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1285 CCValAssign &VA = RVLocs[i];
1286 assert(VA.isRegLoc() && "Can only return in registers!");
1287 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1288 Flag = Chain.getValue(1);
1292 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1294 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1298 //===----------------------------------------------------------------------===//
1299 // Vector related lowering:
1300 //===----------------------------------------------------------------------===//
1302 static ConstantSDNode *
1303 getVecImm(SDNode *N) {
1304 SDOperand OpVal(0, 0);
1306 // Check to see if this buildvec has a single non-undef value in its elements.
1307 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1308 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1310 OpVal = N->getOperand(i);
1311 else if (OpVal != N->getOperand(i))
1315 if (OpVal.Val != 0) {
1316 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1321 return 0; // All UNDEF: use implicit def.; not Constant node
1324 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1325 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1327 SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1328 MVT::ValueType ValueType) {
1329 if (ConstantSDNode *CN = getVecImm(N)) {
1330 uint64_t Value = CN->getValue();
1331 if (Value <= 0x3ffff)
1332 return DAG.getConstant(Value, ValueType);
1338 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1339 /// and the value fits into a signed 16-bit constant, and if so, return the
1341 SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1342 MVT::ValueType ValueType) {
1343 if (ConstantSDNode *CN = getVecImm(N)) {
1344 if (ValueType == MVT::i32) {
1345 int Value = (int) CN->getValue();
1346 int SExtValue = ((Value & 0xffff) << 16) >> 16;
1348 if (Value == SExtValue)
1349 return DAG.getConstant(Value, ValueType);
1350 } else if (ValueType == MVT::i16) {
1351 short Value = (short) CN->getValue();
1352 int SExtValue = ((int) Value << 16) >> 16;
1354 if (Value == (short) SExtValue)
1355 return DAG.getConstant(Value, ValueType);
1356 } else if (ValueType == MVT::i64) {
1357 int64_t Value = CN->getValue();
1358 int64_t SExtValue = ((Value & 0xffff) << (64 - 16)) >> (64 - 16);
1360 if (Value == SExtValue)
1361 return DAG.getConstant(Value, ValueType);
1368 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1369 /// and the value fits into a signed 10-bit constant, and if so, return the
1371 SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1372 MVT::ValueType ValueType) {
1373 if (ConstantSDNode *CN = getVecImm(N)) {
1374 int Value = (int) CN->getValue();
1375 if ((ValueType == MVT::i32 && isS10Constant(Value))
1376 || (ValueType == MVT::i16 && isS10Constant((short) Value)))
1377 return DAG.getConstant(Value, ValueType);
1383 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1384 /// and the value fits into a signed 8-bit constant, and if so, return the
1387 /// @note: The incoming vector is v16i8 because that's the only way we can load
1388 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1390 SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1391 MVT::ValueType ValueType) {
1392 if (ConstantSDNode *CN = getVecImm(N)) {
1393 int Value = (int) CN->getValue();
1394 if (ValueType == MVT::i16
1395 && Value <= 0xffff /* truncated from uint64_t */
1396 && ((short) Value >> 8) == ((short) Value & 0xff))
1397 return DAG.getConstant(Value & 0xff, ValueType);
1398 else if (ValueType == MVT::i8
1399 && (Value & 0xff) == Value)
1400 return DAG.getConstant(Value, ValueType);
1406 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1407 /// and the value fits into a signed 16-bit constant, and if so, return the
1409 SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1410 MVT::ValueType ValueType) {
1411 if (ConstantSDNode *CN = getVecImm(N)) {
1412 uint64_t Value = CN->getValue();
1413 if ((ValueType == MVT::i32
1414 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1415 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1416 return DAG.getConstant(Value >> 16, ValueType);
1422 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1423 SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1424 if (ConstantSDNode *CN = getVecImm(N)) {
1425 return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1431 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1432 SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1433 if (ConstantSDNode *CN = getVecImm(N)) {
1434 return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1440 // If this is a vector of constants or undefs, get the bits. A bit in
1441 // UndefBits is set if the corresponding element of the vector is an
1442 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1443 // zero. Return true if this is not an array of constants, false if it is.
1445 static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1446 uint64_t UndefBits[2]) {
1447 // Start with zero'd results.
1448 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1450 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType());
1451 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1452 SDOperand OpVal = BV->getOperand(i);
1454 unsigned PartNo = i >= e/2; // In the upper 128 bits?
1455 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t.
1457 uint64_t EltBits = 0;
1458 if (OpVal.getOpcode() == ISD::UNDEF) {
1459 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1460 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1462 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1463 EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1464 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1465 const APFloat &apf = CN->getValueAPF();
1466 EltBits = (CN->getValueType(0) == MVT::f32
1467 ? FloatToBits(apf.convertToFloat())
1468 : DoubleToBits(apf.convertToDouble()));
1470 // Nonconstant element.
1474 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1477 //printf("%llx %llx %llx %llx\n",
1478 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1482 /// If this is a splat (repetition) of a value across the whole vector, return
1483 /// the smallest size that splats it. For example, "0x01010101010101..." is a
1484 /// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and
1485 /// SplatSize = 1 byte.
1486 static bool isConstantSplat(const uint64_t Bits128[2],
1487 const uint64_t Undef128[2],
1489 uint64_t &SplatBits, uint64_t &SplatUndef,
1491 // Don't let undefs prevent splats from matching. See if the top 64-bits are
1492 // the same as the lower 64-bits, ignoring undefs.
1493 uint64_t Bits64 = Bits128[0] | Bits128[1];
1494 uint64_t Undef64 = Undef128[0] & Undef128[1];
1495 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1496 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1497 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16);
1498 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1500 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1501 if (MinSplatBits < 64) {
1503 // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1505 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1506 if (MinSplatBits < 32) {
1508 // If the top 16-bits are different than the lower 16-bits, ignoring
1509 // undefs, we have an i32 splat.
1510 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1511 if (MinSplatBits < 16) {
1512 // If the top 8-bits are different than the lower 8-bits, ignoring
1513 // undefs, we have an i16 splat.
1514 if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1515 // Otherwise, we have an 8-bit splat.
1516 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8);
1517 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1523 SplatUndef = Undef16;
1530 SplatUndef = Undef32;
1536 SplatBits = Bits128[0];
1537 SplatUndef = Undef128[0];
1543 return false; // Can't be a splat if two pieces don't match.
1546 // If this is a case we can't handle, return null and let the default
1547 // expansion code take care of it. If we CAN select this case, and if it
1548 // selects to a single instruction, return Op. Otherwise, if we can codegen
1549 // this case more efficiently than a constant pool load, lower it to the
1550 // sequence of ops that should be used.
1551 static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1552 MVT::ValueType VT = Op.getValueType();
1553 // If this is a vector of constants or undefs, get the bits. A bit in
1554 // UndefBits is set if the corresponding element of the vector is an
1555 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are
1557 uint64_t VectorBits[2];
1558 uint64_t UndefBits[2];
1559 uint64_t SplatBits, SplatUndef;
1561 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1562 || !isConstantSplat(VectorBits, UndefBits,
1563 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
1564 SplatBits, SplatUndef, SplatSize))
1565 return SDOperand(); // Not a constant vector, not a splat.
1570 uint32_t Value32 = SplatBits;
1571 assert(SplatSize == 4
1572 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1573 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1574 SDOperand T = DAG.getConstant(Value32, MVT::i32);
1575 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1576 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1580 uint64_t f64val = SplatBits;
1581 assert(SplatSize == 8
1582 && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1583 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1584 SDOperand T = DAG.getConstant(f64val, MVT::i64);
1585 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1586 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1590 // 8-bit constants have to be expanded to 16-bits
1591 unsigned short Value16 = SplatBits | (SplatBits << 8);
1593 for (int i = 0; i < 8; ++i)
1594 Ops[i] = DAG.getConstant(Value16, MVT::i16);
1595 return DAG.getNode(ISD::BIT_CONVERT, VT,
1596 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1599 unsigned short Value16;
1601 Value16 = (unsigned short) (SplatBits & 0xffff);
1603 Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1604 SDOperand T = DAG.getConstant(Value16, MVT::getVectorElementType(VT));
1606 for (int i = 0; i < 8; ++i) Ops[i] = T;
1607 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1610 unsigned int Value = SplatBits;
1611 SDOperand T = DAG.getConstant(Value, MVT::getVectorElementType(VT));
1612 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1615 uint64_t val = SplatBits;
1616 uint32_t upper = uint32_t(val >> 32);
1617 uint32_t lower = uint32_t(val);
1622 SmallVector<SDOperand, 16> ShufBytes;
1624 bool upper_special, lower_special;
1626 // NOTE: This code creates common-case shuffle masks that can be easily
1627 // detected as common expressions. It is not attempting to create highly
1628 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1630 // Detect if the upper or lower half is a special shuffle mask pattern:
1631 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1632 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1634 // Create lower vector if not a special pattern
1635 if (!lower_special) {
1636 SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1637 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1638 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1639 LO32C, LO32C, LO32C, LO32C));
1642 // Create upper vector if not a special pattern
1643 if (!upper_special) {
1644 SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1645 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1646 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1647 HI32C, HI32C, HI32C, HI32C));
1650 // If either upper or lower are special, then the two input operands are
1651 // the same (basically, one of them is a "don't care")
1656 if (lower_special && upper_special) {
1657 // Unhappy situation... both upper and lower are special, so punt with
1658 // a target constant:
1659 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1660 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1664 for (int i = 0; i < 4; ++i) {
1665 for (int j = 0; j < 4; ++j) {
1667 bool process_upper, process_lower;
1670 process_upper = (upper_special && (i & 1) == 0);
1671 process_lower = (lower_special && (i & 1) == 1);
1673 if (process_upper || process_lower) {
1674 if ((process_upper && upper == 0)
1675 || (process_lower && lower == 0))
1677 else if ((process_upper && upper == 0xffffffff)
1678 || (process_lower && lower == 0xffffffff))
1680 else if ((process_upper && upper == 0x80000000)
1681 || (process_lower && lower == 0x80000000))
1682 val = (j == 0 ? 0xe0 : 0x80);
1684 val = i * 4 + j + ((i & 1) * 16);
1686 ShufBytes.push_back(DAG.getConstant(val, MVT::i8));
1690 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1691 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1692 &ShufBytes[0], ShufBytes.size()));
1694 // For zero, this can be lowered efficiently via v4i32 BUILD_VECTOR
1695 SDOperand Zero = DAG.getConstant(0, MVT::i32);
1696 return DAG.getNode(ISD::BIT_CONVERT, VT,
1697 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1698 Zero, Zero, Zero, Zero));
1706 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1707 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1708 /// permutation vector, V3, is monotonically increasing with one "exception"
1709 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1710 /// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1711 /// In either case, the net result is going to eventually invoke SHUFB to
1712 /// permute/shuffle the bytes from V1 and V2.
1714 /// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1715 /// control word for byte/halfword/word insertion. This takes care of a single
1716 /// element move from V2 into V1.
1718 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1719 static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1720 SDOperand V1 = Op.getOperand(0);
1721 SDOperand V2 = Op.getOperand(1);
1722 SDOperand PermMask = Op.getOperand(2);
1724 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1726 // If we have a single element being moved from V1 to V2, this can be handled
1727 // using the C*[DX] compute mask instructions, but the vector elements have
1728 // to be monotonically increasing with one exception element.
1729 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType());
1730 unsigned EltsFromV2 = 0;
1732 unsigned V2EltIdx0 = 0;
1733 unsigned CurrElt = 0;
1734 bool monotonic = true;
1735 if (EltVT == MVT::i8)
1737 else if (EltVT == MVT::i16)
1739 else if (EltVT == MVT::i32)
1742 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1744 for (unsigned i = 0, e = PermMask.getNumOperands();
1745 EltsFromV2 <= 1 && monotonic && i != e;
1748 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1751 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1753 if (SrcElt >= V2EltIdx0) {
1755 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1756 } else if (CurrElt != SrcElt) {
1763 if (EltsFromV2 == 1 && monotonic) {
1764 // Compute mask and shuffle
1765 MachineFunction &MF = DAG.getMachineFunction();
1766 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1767 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1768 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1769 // Initialize temporary register to 0
1770 SDOperand InitTempReg =
1771 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1772 // Copy register's contents as index in INSERT_MASK:
1773 SDOperand ShufMaskOp =
1774 DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1775 DAG.getTargetConstant(V2Elt, MVT::i32),
1776 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1777 // Use shuffle mask in SHUFB synthetic instruction:
1778 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1780 // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1781 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
1783 SmallVector<SDOperand, 16> ResultMask;
1784 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1786 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1789 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1791 for (unsigned j = 0; j != BytesPerElement; ++j) {
1792 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1797 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1798 &ResultMask[0], ResultMask.size());
1799 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1803 static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1804 SDOperand Op0 = Op.getOperand(0); // Op0 = the scalar
1806 if (Op0.Val->getOpcode() == ISD::Constant) {
1807 // For a constant, build the appropriate constant vector, which will
1808 // eventually simplify to a vector register load.
1810 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1811 SmallVector<SDOperand, 16> ConstVecValues;
1815 // Create a constant vector:
1816 switch (Op.getValueType()) {
1817 default: assert(0 && "Unexpected constant value type in "
1818 "LowerSCALAR_TO_VECTOR");
1819 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1820 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1821 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1822 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1823 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1824 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1827 SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1828 for (size_t j = 0; j < n_copies; ++j)
1829 ConstVecValues.push_back(CValue);
1831 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1832 &ConstVecValues[0], ConstVecValues.size());
1834 // Otherwise, copy the value from one register to another:
1835 switch (Op0.getValueType()) {
1836 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1843 return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1850 static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1851 switch (Op.getValueType()) {
1853 SDOperand rA = Op.getOperand(0);
1854 SDOperand rB = Op.getOperand(1);
1855 SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1856 SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1857 SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1858 SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1860 return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1864 // Multiply two v8i16 vectors (pipeline friendly version):
1865 // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1866 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1867 // c) Use SELB to select upper and lower halves from the intermediate results
1869 // NOTE: We really want to move the FSMBI to earlier to actually get the
1870 // dual-issue. This code does manage to do this, even if it's a little on
1873 MachineFunction &MF = DAG.getMachineFunction();
1874 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1875 SDOperand Chain = Op.getOperand(0);
1876 SDOperand rA = Op.getOperand(0);
1877 SDOperand rB = Op.getOperand(1);
1878 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1879 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1882 DAG.getCopyToReg(Chain, FSMBIreg,
1883 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1884 DAG.getConstant(0xcccc, MVT::i32)));
1887 DAG.getCopyToReg(FSMBOp, HiProdReg,
1888 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1890 SDOperand HHProd_v4i32 =
1891 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1892 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1894 return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1895 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1896 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1897 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1899 DAG.getConstant(16, MVT::i16))),
1900 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1903 // This M00sE is N@stI! (apologies to Monty Python)
1905 // SPU doesn't know how to do any 8-bit multiplication, so the solution
1906 // is to break it all apart, sign extend, and reassemble the various
1907 // intermediate products.
1909 MachineFunction &MF = DAG.getMachineFunction();
1910 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1911 SDOperand Chain = Op.getOperand(0);
1912 SDOperand rA = Op.getOperand(0);
1913 SDOperand rB = Op.getOperand(1);
1914 SDOperand c8 = DAG.getConstant(8, MVT::i8);
1915 SDOperand c16 = DAG.getConstant(16, MVT::i8);
1917 unsigned FSMBreg_2222 = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1918 unsigned LoProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1919 unsigned HiProd_reg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1922 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1923 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1924 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1926 SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1928 SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1931 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1932 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1934 SDOperand FSMBdef_2222 =
1935 DAG.getCopyToReg(Chain, FSMBreg_2222,
1936 DAG.getNode(SPUISD::FSMBI, MVT::v8i16,
1937 DAG.getConstant(0x2222, MVT::i32)));
1939 SDOperand FSMBuse_2222 =
1940 DAG.getCopyFromReg(FSMBdef_2222, FSMBreg_2222, MVT::v4i32);
1942 SDOperand LoProd_1 =
1943 DAG.getCopyToReg(Chain, LoProd_reg,
1944 DAG.getNode(SPUISD::SELB, MVT::v8i16, LLProd, LHProd,
1947 SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1950 DAG.getNode(ISD::AND, MVT::v4i32,
1951 DAG.getCopyFromReg(LoProd_1, LoProd_reg, MVT::v4i32),
1952 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1953 LoProdMask, LoProdMask,
1954 LoProdMask, LoProdMask));
1957 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1958 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
1961 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
1962 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
1965 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1966 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
1967 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
1969 SDOperand HHProd_1 =
1970 DAG.getNode(SPUISD::MPY, MVT::v8i16,
1971 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1972 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
1973 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
1974 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
1977 DAG.getCopyToReg(Chain, HiProd_reg,
1978 DAG.getNode(SPUISD::SELB, MVT::v8i16,
1980 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
1984 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1985 DAG.getCopyFromReg(HHProd, HiProd_reg, MVT::v4i32), c16);
1987 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
1988 DAG.getNode(ISD::OR, MVT::v4i32,
1993 cerr << "CellSPU: Unknown vector multiplication, got "
1994 << MVT::getValueTypeString(Op.getValueType())
2003 static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2004 MachineFunction &MF = DAG.getMachineFunction();
2005 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2007 SDOperand A = Op.getOperand(0);
2008 SDOperand B = Op.getOperand(1);
2009 unsigned VT = Op.getValueType();
2011 unsigned VRegBR, VRegC;
2013 if (VT == MVT::f32) {
2014 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2015 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2017 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2018 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2020 // TODO: make sure we're feeding FPInterp the right arguments
2021 // Right now: fi B, frest(B)
2024 // (Floating Interpolate (FP Reciprocal Estimate B))
2026 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2027 DAG.getNode(SPUISD::FPInterp, VT, B,
2028 DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2030 // Computes A * BRcpl and stores in a temporary register
2032 DAG.getCopyToReg(BRcpl, VRegC,
2033 DAG.getNode(ISD::FMUL, VT, A,
2034 DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2035 // What's the Chain variable do? It's magic!
2036 // TODO: set Chain = Op(0).getEntryNode()
2038 return DAG.getNode(ISD::FADD, VT,
2039 DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2040 DAG.getNode(ISD::FMUL, VT,
2041 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2042 DAG.getNode(ISD::FSUB, VT, A,
2043 DAG.getNode(ISD::FMUL, VT, B,
2044 DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2047 static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2048 unsigned VT = Op.getValueType();
2049 SDOperand N = Op.getOperand(0);
2050 SDOperand Elt = Op.getOperand(1);
2051 SDOperand ShufMask[16];
2052 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2054 assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2056 int EltNo = (int) C->getValue();
2059 if (VT == MVT::i8 && EltNo >= 16)
2060 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2061 else if (VT == MVT::i16 && EltNo >= 8)
2062 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2063 else if (VT == MVT::i32 && EltNo >= 4)
2064 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2065 else if (VT == MVT::i64 && EltNo >= 2)
2066 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2068 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2069 // i32 and i64: Element 0 is the preferred slot
2070 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2073 // Need to generate shuffle mask and extract:
2074 int prefslot_begin = -1, prefslot_end = -1;
2075 int elt_byte = EltNo * MVT::getSizeInBits(VT) / 8;
2079 prefslot_begin = prefslot_end = 3;
2083 prefslot_begin = 2; prefslot_end = 3;
2087 prefslot_begin = 0; prefslot_end = 3;
2091 prefslot_begin = 0; prefslot_end = 7;
2096 assert(prefslot_begin != -1 && prefslot_end != -1 &&
2097 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2099 for (int i = 0; i < 16; ++i) {
2100 // zero fill uppper part of preferred slot, don't care about the
2102 unsigned int mask_val;
2104 if (i <= prefslot_end) {
2106 ((i < prefslot_begin)
2108 : elt_byte + (i - prefslot_begin));
2110 ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2112 ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2115 SDOperand ShufMaskVec =
2116 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2118 sizeof(ShufMask) / sizeof(ShufMask[0]));
2120 return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2121 DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2122 N, N, ShufMaskVec));
2126 static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2127 SDOperand VecOp = Op.getOperand(0);
2128 SDOperand ValOp = Op.getOperand(1);
2129 SDOperand IdxOp = Op.getOperand(2);
2130 MVT::ValueType VT = Op.getValueType();
2132 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2133 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2135 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2136 // Use $2 because it's always 16-byte aligned and it's available:
2137 SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2140 DAG.getNode(SPUISD::SHUFB, VT,
2141 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2143 DAG.getNode(SPUISD::INSERT_MASK, VT,
2144 DAG.getNode(ISD::ADD, PtrVT,
2146 DAG.getConstant(CN->getValue(),
2152 static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc) {
2153 SDOperand N0 = Op.getOperand(0); // Everything has at least one operand
2155 assert(Op.getValueType() == MVT::i8);
2158 assert(0 && "Unhandled i8 math operator");
2162 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2164 SDOperand N1 = Op.getOperand(1);
2165 N0 = (N0.getOpcode() != ISD::Constant
2166 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2167 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2168 N1 = (N1.getOpcode() != ISD::Constant
2169 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2170 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2171 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2172 DAG.getNode(Opc, MVT::i16, N0, N1));
2176 SDOperand N1 = Op.getOperand(1);
2178 N0 = (N0.getOpcode() != ISD::Constant
2179 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2180 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2181 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2182 N1 = (N1.getOpcode() != ISD::Constant
2183 ? DAG.getNode(N1Opc, MVT::i16, N1)
2184 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2185 SDOperand ExpandArg =
2186 DAG.getNode(ISD::OR, MVT::i16, N0,
2187 DAG.getNode(ISD::SHL, MVT::i16,
2188 N0, DAG.getConstant(8, MVT::i16)));
2189 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2190 DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2194 SDOperand N1 = Op.getOperand(1);
2196 N0 = (N0.getOpcode() != ISD::Constant
2197 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2198 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2199 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::ZERO_EXTEND : ISD::TRUNCATE);
2200 N1 = (N1.getOpcode() != ISD::Constant
2201 ? DAG.getNode(N1Opc, MVT::i16, N1)
2202 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2203 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2204 DAG.getNode(Opc, MVT::i16, N0, N1));
2207 SDOperand N1 = Op.getOperand(1);
2209 N0 = (N0.getOpcode() != ISD::Constant
2210 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2211 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2212 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2213 N1 = (N1.getOpcode() != ISD::Constant
2214 ? DAG.getNode(N1Opc, MVT::i16, N1)
2215 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2216 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2217 DAG.getNode(Opc, MVT::i16, N0, N1));
2220 SDOperand N1 = Op.getOperand(1);
2222 N0 = (N0.getOpcode() != ISD::Constant
2223 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2224 : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2225 N1Opc = (N1.getValueType() < MVT::i16 ? ISD::SIGN_EXTEND : ISD::TRUNCATE);
2226 N1 = (N1.getOpcode() != ISD::Constant
2227 ? DAG.getNode(N1Opc, MVT::i16, N1)
2228 : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2229 return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2230 DAG.getNode(Opc, MVT::i16, N0, N1));
2238 //! Lower byte immediate operations for v16i8 vectors:
2240 LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2243 MVT::ValueType VT = Op.getValueType();
2245 ConstVec = Op.getOperand(0);
2246 Arg = Op.getOperand(1);
2247 if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2248 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2249 ConstVec = ConstVec.getOperand(0);
2251 ConstVec = Op.getOperand(1);
2252 Arg = Op.getOperand(0);
2253 if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2254 ConstVec = ConstVec.getOperand(0);
2259 if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2260 uint64_t VectorBits[2];
2261 uint64_t UndefBits[2];
2262 uint64_t SplatBits, SplatUndef;
2265 if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2266 && isConstantSplat(VectorBits, UndefBits,
2267 MVT::getSizeInBits(MVT::getVectorElementType(VT)),
2268 SplatBits, SplatUndef, SplatSize)) {
2269 SDOperand tcVec[16];
2270 SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2271 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2273 // Turn the BUILD_VECTOR into a set of target constants:
2274 for (size_t i = 0; i < tcVecSize; ++i)
2277 return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2278 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2285 //! Lower i32 multiplication
2286 static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, unsigned VT,
2290 cerr << "CellSPU: Unknown LowerMUL value type, got "
2291 << MVT::getValueTypeString(Op.getValueType())
2297 SDOperand rA = Op.getOperand(0);
2298 SDOperand rB = Op.getOperand(1);
2300 return DAG.getNode(ISD::ADD, MVT::i32,
2301 DAG.getNode(ISD::ADD, MVT::i32,
2302 DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2303 DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2304 DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2311 //! Custom lowering for CTPOP (count population)
2313 Custom lowering code that counts the number ones in the input
2314 operand. SPU has such an instruction, but it counts the number of
2315 ones per byte, which then have to be accumulated.
2317 static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2318 unsigned VT = Op.getValueType();
2319 unsigned vecVT = MVT::getVectorType(VT, (128 / MVT::getSizeInBits(VT)));
2323 SDOperand N = Op.getOperand(0);
2324 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2326 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2327 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2329 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2333 MachineFunction &MF = DAG.getMachineFunction();
2334 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2336 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2338 SDOperand N = Op.getOperand(0);
2339 SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2340 SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2341 SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2343 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2344 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2346 // CNTB_result becomes the chain to which all of the virtual registers
2347 // CNTB_reg, SUM1_reg become associated:
2348 SDOperand CNTB_result =
2349 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2351 SDOperand CNTB_rescopy =
2352 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2354 SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2356 return DAG.getNode(ISD::AND, MVT::i16,
2357 DAG.getNode(ISD::ADD, MVT::i16,
2358 DAG.getNode(ISD::SRL, MVT::i16,
2365 MachineFunction &MF = DAG.getMachineFunction();
2366 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2368 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2369 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2371 SDOperand N = Op.getOperand(0);
2372 SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2373 SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2374 SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2375 SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2377 SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2378 SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2380 // CNTB_result becomes the chain to which all of the virtual registers
2381 // CNTB_reg, SUM1_reg become associated:
2382 SDOperand CNTB_result =
2383 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2385 SDOperand CNTB_rescopy =
2386 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2389 DAG.getNode(ISD::SRL, MVT::i32,
2390 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2393 DAG.getNode(ISD::ADD, MVT::i32,
2394 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2396 SDOperand Sum1_rescopy =
2397 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2400 DAG.getNode(ISD::SRL, MVT::i32,
2401 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2404 DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2405 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2407 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2417 /// LowerOperation - Provide custom lowering hooks for some operations.
2420 SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2422 switch (Op.getOpcode()) {
2424 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2425 cerr << "Op.getOpcode() = " << Op.getOpcode() << "\n";
2426 cerr << "*Op.Val:\n";
2433 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2435 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2436 case ISD::ConstantPool:
2437 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2438 case ISD::GlobalAddress:
2439 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2440 case ISD::JumpTable:
2441 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2443 return LowerConstant(Op, DAG);
2444 case ISD::ConstantFP:
2445 return LowerConstantFP(Op, DAG);
2446 case ISD::FORMAL_ARGUMENTS:
2447 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2449 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2451 return LowerRET(Op, DAG, getTargetMachine());
2460 return LowerI8Math(Op, DAG, Op.getOpcode());
2462 // Vector-related lowering.
2463 case ISD::BUILD_VECTOR:
2464 return LowerBUILD_VECTOR(Op, DAG);
2465 case ISD::SCALAR_TO_VECTOR:
2466 return LowerSCALAR_TO_VECTOR(Op, DAG);
2467 case ISD::VECTOR_SHUFFLE:
2468 return LowerVECTOR_SHUFFLE(Op, DAG);
2469 case ISD::EXTRACT_VECTOR_ELT:
2470 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2471 case ISD::INSERT_VECTOR_ELT:
2472 return LowerINSERT_VECTOR_ELT(Op, DAG);
2474 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2478 return LowerByteImmed(Op, DAG);
2480 // Vector and i8 multiply:
2482 if (MVT::isVector(Op.getValueType()))
2483 return LowerVectorMUL(Op, DAG);
2484 else if (Op.getValueType() == MVT::i8)
2485 return LowerI8Math(Op, DAG, Op.getOpcode());
2487 return LowerMUL(Op, DAG, Op.getValueType(), Op.getOpcode());
2490 if (Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::v4f32)
2491 return LowerFDIVf32(Op, DAG);
2492 // else if (Op.getValueType() == MVT::f64)
2493 // return LowerFDIVf64(Op, DAG);
2495 assert(0 && "Calling FDIV on unsupported MVT");
2498 return LowerCTPOP(Op, DAG);
2504 //===----------------------------------------------------------------------===//
2505 // Other Lowering Code
2506 //===----------------------------------------------------------------------===//
2509 SPUTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
2510 MachineBasicBlock *BB)
2515 //===----------------------------------------------------------------------===//
2516 // Target Optimization Hooks
2517 //===----------------------------------------------------------------------===//
2520 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2523 TargetMachine &TM = getTargetMachine();
2524 SelectionDAG &DAG = DCI.DAG;
2526 SDOperand N0 = N->getOperand(0); // everything has at least one operand
2528 switch (N->getOpcode()) {
2531 // Look for obvious optimizations for shift left:
2532 // a) Replace 0 << V with 0
2533 // b) Replace V << 0 with V
2535 // N.B: llvm will generate an undef node if the shift amount is greater than
2536 // 15 (e.g.: V << 16), which will naturally trigger an assert.
2539 case SPU::SHLQBIIvec:
2541 case SPU::ROTHIr16_i32:
2543 case SPU::ROTIr32_i16:
2544 case SPU::ROTQBYIvec:
2545 case SPU::ROTQBYBIvec:
2546 case SPU::ROTQBIIvec:
2547 case SPU::ROTHMIr16:
2549 case SPU::ROTQMBYIvec: {
2550 if (N0.getOpcode() == ISD::Constant) {
2551 if (ConstantSDNode *C = cast<ConstantSDNode>(N0)) {
2552 if (C->getValue() == 0) // 0 << V -> 0.
2556 SDOperand N1 = N->getOperand(1);
2557 if (N1.getOpcode() == ISD::Constant) {
2558 if (ConstantSDNode *C = cast<ConstantSDNode>(N1)) {
2559 if (C->getValue() == 0) // V << 0 -> V
2570 //===----------------------------------------------------------------------===//
2571 // Inline Assembly Support
2572 //===----------------------------------------------------------------------===//
2574 /// getConstraintType - Given a constraint letter, return the type of
2575 /// constraint it is for this target.
2576 SPUTargetLowering::ConstraintType
2577 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2578 if (ConstraintLetter.size() == 1) {
2579 switch (ConstraintLetter[0]) {
2586 return C_RegisterClass;
2589 return TargetLowering::getConstraintType(ConstraintLetter);
2592 std::pair<unsigned, const TargetRegisterClass*>
2593 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2594 MVT::ValueType VT) const
2596 if (Constraint.size() == 1) {
2597 // GCC RS6000 Constraint Letters
2598 switch (Constraint[0]) {
2602 return std::make_pair(0U, SPU::R64CRegisterClass);
2603 return std::make_pair(0U, SPU::R32CRegisterClass);
2606 return std::make_pair(0U, SPU::R32FPRegisterClass);
2607 else if (VT == MVT::f64)
2608 return std::make_pair(0U, SPU::R64FPRegisterClass);
2611 return std::make_pair(0U, SPU::GPRCRegisterClass);
2615 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2619 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2621 uint64_t &KnownZero,
2623 const SelectionDAG &DAG,
2624 unsigned Depth ) const {
2629 // LowerAsmOperandForConstraint
2631 SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
2632 char ConstraintLetter,
2633 std::vector<SDOperand> &Ops,
2634 SelectionDAG &DAG) {
2635 // Default, for the time being, to the base class handler
2636 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
2639 /// isLegalAddressImmediate - Return true if the integer value can be used
2640 /// as the offset of the target addressing mode.
2641 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
2642 // SPU's addresses are 256K:
2643 return (V > -(1 << 18) && V < (1 << 18) - 1);
2646 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {