1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Chris Lattner and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the PPCISelLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "PPCISelLowering.h"
15 #include "PPCTargetMachine.h"
16 #include "llvm/ADT/VectorExtras.h"
17 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/SelectionDAG.h"
22 #include "llvm/CodeGen/SSARegMap.h"
23 #include "llvm/Constants.h"
24 #include "llvm/Function.h"
25 #include "llvm/Intrinsics.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Target/TargetOptions.h"
30 PPCTargetLowering::PPCTargetLowering(TargetMachine &TM)
31 : TargetLowering(TM) {
33 // Fold away setcc operations if possible.
34 setSetCCIsExpensive();
37 // Use _setjmp/_longjmp instead of setjmp/longjmp.
38 setUseUnderscoreSetJmpLongJmp(true);
40 // Set up the register classes.
41 addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
42 addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
43 addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
45 setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
46 setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
48 // PowerPC has no intrinsics for these particular operations
49 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand);
50 setOperationAction(ISD::MEMSET, MVT::Other, Expand);
51 setOperationAction(ISD::MEMCPY, MVT::Other, Expand);
53 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
54 setOperationAction(ISD::SEXTLOAD, MVT::i1, Expand);
55 setOperationAction(ISD::SEXTLOAD, MVT::i8, Expand);
57 // PowerPC has no SREM/UREM instructions
58 setOperationAction(ISD::SREM, MVT::i32, Expand);
59 setOperationAction(ISD::UREM, MVT::i32, Expand);
61 // We don't support sin/cos/sqrt/fmod
62 setOperationAction(ISD::FSIN , MVT::f64, Expand);
63 setOperationAction(ISD::FCOS , MVT::f64, Expand);
64 setOperationAction(ISD::FREM , MVT::f64, Expand);
65 setOperationAction(ISD::FSIN , MVT::f32, Expand);
66 setOperationAction(ISD::FCOS , MVT::f32, Expand);
67 setOperationAction(ISD::FREM , MVT::f32, Expand);
69 // If we're enabling GP optimizations, use hardware square root
70 if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
71 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
72 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
75 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
76 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
78 // PowerPC does not have BSWAP, CTPOP or CTTZ
79 setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
80 setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
81 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
83 // PowerPC does not have ROTR
84 setOperationAction(ISD::ROTR, MVT::i32 , Expand);
86 // PowerPC does not have Select
87 setOperationAction(ISD::SELECT, MVT::i32, Expand);
88 setOperationAction(ISD::SELECT, MVT::f32, Expand);
89 setOperationAction(ISD::SELECT, MVT::f64, Expand);
91 // PowerPC wants to turn select_cc of FP into fsel when possible.
92 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
93 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
95 // PowerPC wants to optimize integer setcc a bit
96 setOperationAction(ISD::SETCC, MVT::i32, Custom);
98 // PowerPC does not have BRCOND which requires SetCC
99 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
101 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
102 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
104 // PowerPC does not have [U|S]INT_TO_FP
105 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
106 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
108 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
109 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
111 // PowerPC does not have truncstore for i1.
112 setOperationAction(ISD::TRUNCSTORE, MVT::i1, Promote);
114 // Support label based line numbers.
115 setOperationAction(ISD::LOCATION, MVT::Other, Expand);
116 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
117 // FIXME - use subtarget debug flags
118 if (!TM.getSubtarget<PPCSubtarget>().isDarwin())
119 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);
121 // We want to legalize GlobalAddress and ConstantPool nodes into the
122 // appropriate instructions to materialize the address.
123 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
124 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
126 // RET must be custom lowered, to meet ABI requirements
127 setOperationAction(ISD::RET , MVT::Other, Custom);
129 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
130 setOperationAction(ISD::VASTART , MVT::Other, Custom);
132 // Use the default implementation.
133 setOperationAction(ISD::VAARG , MVT::Other, Expand);
134 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
135 setOperationAction(ISD::VAEND , MVT::Other, Expand);
136 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
137 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
138 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
140 // We want to custom lower some of our intrinsics.
141 setOperationAction(ISD::INTRINSIC , MVT::Other, Custom);
143 if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {
144 // They also have instructions for converting between i64 and fp.
145 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
146 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
148 // FIXME: disable this lowered code. This generates 64-bit register values,
149 // and we don't model the fact that the top part is clobbered by calls. We
150 // need to flag these together so that the value isn't live across a call.
151 //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
153 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
154 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
156 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
157 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
160 if (TM.getSubtarget<PPCSubtarget>().has64BitRegs()) {
161 // 64 bit PowerPC implementations can support i64 types directly
162 addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
163 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
164 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
166 // 32 bit PowerPC wants to expand i64 shifts itself.
167 setOperationAction(ISD::SHL, MVT::i64, Custom);
168 setOperationAction(ISD::SRL, MVT::i64, Custom);
169 setOperationAction(ISD::SRA, MVT::i64, Custom);
172 // First set operation action for all vector types to expand. Then we
173 // will selectively turn on ones that can be effectively codegen'd.
174 for (unsigned VT = (unsigned)MVT::Vector + 1;
175 VT != (unsigned)MVT::LAST_VALUETYPE; VT++) {
176 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
177 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
178 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
179 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand);
180 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
181 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand);
184 if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
185 addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
186 addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
187 addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
188 addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
190 setOperationAction(ISD::ADD , MVT::v4f32, Legal);
191 setOperationAction(ISD::SUB , MVT::v4f32, Legal);
192 setOperationAction(ISD::MUL , MVT::v4f32, Legal);
193 setOperationAction(ISD::ADD , MVT::v4i32, Legal);
195 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom);
196 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
198 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
199 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
201 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
202 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
205 setSetCCResultContents(ZeroOrOneSetCCResult);
206 setStackPointerRegisterToSaveRestore(PPC::R1);
208 // We have target-specific dag combine patterns for the following nodes:
209 setTargetDAGCombine(ISD::SINT_TO_FP);
210 setTargetDAGCombine(ISD::STORE);
212 computeRegisterProperties();
215 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
218 case PPCISD::FSEL: return "PPCISD::FSEL";
219 case PPCISD::FCFID: return "PPCISD::FCFID";
220 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
221 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
222 case PPCISD::STFIWX: return "PPCISD::STFIWX";
223 case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
224 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
225 case PPCISD::LVE_X: return "PPCISD::LVE_X";
226 case PPCISD::VPERM: return "PPCISD::VPERM";
227 case PPCISD::Hi: return "PPCISD::Hi";
228 case PPCISD::Lo: return "PPCISD::Lo";
229 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
230 case PPCISD::SRL: return "PPCISD::SRL";
231 case PPCISD::SRA: return "PPCISD::SRA";
232 case PPCISD::SHL: return "PPCISD::SHL";
233 case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
234 case PPCISD::STD_32: return "PPCISD::STD_32";
235 case PPCISD::CALL: return "PPCISD::CALL";
236 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
237 case PPCISD::MFCR: return "PPCISD::MFCR";
238 case PPCISD::VCMPo: return "PPCISD::VCMPo";
242 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
243 static bool isFloatingPointZero(SDOperand Op) {
244 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
245 return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
246 else if (Op.getOpcode() == ISD::EXTLOAD || Op.getOpcode() == ISD::LOAD) {
247 // Maybe this has already been legalized into the constant pool?
248 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
249 if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->get()))
250 return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0);
256 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
257 /// specifies a splat of a single element that is suitable for input to
258 /// VSPLTB/VSPLTH/VSPLTW.
259 bool PPC::isSplatShuffleMask(SDNode *N) {
260 assert(N->getOpcode() == ISD::BUILD_VECTOR);
262 // We can only splat 8-bit, 16-bit, and 32-bit quantities.
263 if (N->getNumOperands() != 4 && N->getNumOperands() != 8 &&
264 N->getNumOperands() != 16)
267 // This is a splat operation if each element of the permute is the same, and
268 // if the value doesn't reference the second vector.
269 SDOperand Elt = N->getOperand(0);
270 assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
271 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) {
272 assert(isa<ConstantSDNode>(N->getOperand(i)) &&
273 "Invalid VECTOR_SHUFFLE mask!");
274 if (N->getOperand(i) != Elt) return false;
277 // Make sure it is a splat of the first vector operand.
278 return cast<ConstantSDNode>(Elt)->getValue() < N->getNumOperands();
281 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
282 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
283 unsigned PPC::getVSPLTImmediate(SDNode *N) {
284 assert(isSplatShuffleMask(N));
285 return cast<ConstantSDNode>(N->getOperand(0))->getValue();
288 /// isVecSplatImm - Return true if this is a build_vector of constants which
289 /// can be formed by using a vspltis[bhw] instruction. The ByteSize field
290 /// indicates the number of bytes of each element [124] -> [bhw].
291 bool PPC::isVecSplatImm(SDNode *N, unsigned ByteSize, char *Val) {
292 SDOperand OpVal(0, 0);
293 // Check to see if this buildvec has a single non-undef value in its elements.
294 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
295 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
297 OpVal = N->getOperand(i);
298 else if (OpVal != N->getOperand(i))
302 if (OpVal.Val == 0) return false; // All UNDEF: use implicit def.
304 unsigned ValSizeInBytes;
306 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
307 Value = CN->getValue();
308 ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8;
309 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
310 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
311 Value = FloatToBits(CN->getValue());
315 // If the splat value is larger than the element value, then we can never do
316 // this splat. The only case that we could fit the replicated bits into our
317 // immediate field for would be zero, and we prefer to use vxor for it.
318 if (ValSizeInBytes < ByteSize) return false;
320 // If the element value is larger than the splat value, cut it in half and
321 // check to see if the two halves are equal. Continue doing this until we
322 // get to ByteSize. This allows us to handle 0x01010101 as 0x01.
323 while (ValSizeInBytes > ByteSize) {
324 ValSizeInBytes >>= 1;
326 // If the top half equals the bottom half, we're still ok.
327 if (((Value >> (ValSizeInBytes*8)) & ((8 << ValSizeInBytes)-1)) !=
328 (Value & ((8 << ValSizeInBytes)-1)))
332 // Properly sign extend the value.
333 int ShAmt = (4-ByteSize)*8;
334 int MaskVal = ((int)Value << ShAmt) >> ShAmt;
336 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
337 if (MaskVal == 0) return false;
339 if (Val) *Val = MaskVal;
341 // Finally, if this value fits in a 5 bit sext field, return true.
342 return ((MaskVal << (32-5)) >> (32-5)) == MaskVal;
346 /// LowerOperation - Provide custom lowering hooks for some operations.
348 SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
349 switch (Op.getOpcode()) {
350 default: assert(0 && "Wasn't expecting to be able to lower this!");
351 case ISD::FP_TO_SINT: {
352 assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType()));
353 SDOperand Src = Op.getOperand(0);
354 if (Src.getValueType() == MVT::f32)
355 Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src);
358 switch (Op.getValueType()) {
359 default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
361 Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src);
364 Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src);
368 // Convert the FP value to an int value through memory.
369 SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Tmp);
370 if (Op.getValueType() == MVT::i32)
371 Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits);
374 case ISD::SINT_TO_FP:
375 if (Op.getOperand(0).getValueType() == MVT::i64) {
376 SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0));
377 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits);
378 if (Op.getValueType() == MVT::f32)
379 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
382 assert(Op.getOperand(0).getValueType() == MVT::i32 &&
383 "Unhandled SINT_TO_FP type in custom expander!");
384 // Since we only generate this in 64-bit mode, we can take advantage of
385 // 64-bit registers. In particular, sign extend the input value into the
386 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
387 // then lfd it and fcfid it.
388 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
389 int FrameIdx = FrameInfo->CreateStackObject(8, 8);
390 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);
392 SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32,
395 // STD the extended value into the stack slot.
396 SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other,
397 DAG.getEntryNode(), Ext64, FIdx,
398 DAG.getSrcValue(NULL));
399 // Load the value as a double.
400 SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, DAG.getSrcValue(NULL));
402 // FCFID it and return it.
403 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld);
404 if (Op.getValueType() == MVT::f32)
405 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP);
410 case ISD::SELECT_CC: {
411 // Turn FP only select_cc's into fsel instructions.
412 if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) ||
413 !MVT::isFloatingPoint(Op.getOperand(2).getValueType()))
416 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
418 // Cannot handle SETEQ/SETNE.
419 if (CC == ISD::SETEQ || CC == ISD::SETNE) break;
421 MVT::ValueType ResVT = Op.getValueType();
422 MVT::ValueType CmpVT = Op.getOperand(0).getValueType();
423 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1);
424 SDOperand TV = Op.getOperand(2), FV = Op.getOperand(3);
426 // If the RHS of the comparison is a 0.0, we don't need to do the
427 // subtraction at all.
428 if (isFloatingPointZero(RHS))
430 default: break; // SETUO etc aren't handled by fsel.
433 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
436 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
437 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
438 return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV);
441 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
444 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
445 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS);
446 return DAG.getNode(PPCISD::FSEL, ResVT,
447 DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV);
452 default: break; // SETUO etc aren't handled by fsel.
455 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
456 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
457 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
458 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
461 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS);
462 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
463 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
464 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
467 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
468 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
469 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
470 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV);
473 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS);
474 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
475 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp);
476 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV);
481 assert(Op.getValueType() == MVT::i64 &&
482 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
483 // The generic code does a fine job expanding shift by a constant.
484 if (isa<ConstantSDNode>(Op.getOperand(1))) break;
486 // Otherwise, expand into a bunch of logical ops. Note that these ops
487 // depend on the PPC behavior for oversized shift amounts.
488 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
489 DAG.getConstant(0, MVT::i32));
490 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
491 DAG.getConstant(1, MVT::i32));
492 SDOperand Amt = Op.getOperand(1);
494 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
495 DAG.getConstant(32, MVT::i32), Amt);
496 SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt);
497 SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1);
498 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
499 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
500 DAG.getConstant(-32U, MVT::i32));
501 SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5);
502 SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
503 SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt);
504 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
507 assert(Op.getValueType() == MVT::i64 &&
508 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!");
509 // The generic code does a fine job expanding shift by a constant.
510 if (isa<ConstantSDNode>(Op.getOperand(1))) break;
512 // Otherwise, expand into a bunch of logical ops. Note that these ops
513 // depend on the PPC behavior for oversized shift amounts.
514 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
515 DAG.getConstant(0, MVT::i32));
516 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
517 DAG.getConstant(1, MVT::i32));
518 SDOperand Amt = Op.getOperand(1);
520 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
521 DAG.getConstant(32, MVT::i32), Amt);
522 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
523 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
524 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
525 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
526 DAG.getConstant(-32U, MVT::i32));
527 SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5);
528 SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6);
529 SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt);
530 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
533 assert(Op.getValueType() == MVT::i64 &&
534 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!");
535 // The generic code does a fine job expanding shift by a constant.
536 if (isa<ConstantSDNode>(Op.getOperand(1))) break;
538 // Otherwise, expand into a bunch of logical ops, followed by a select_cc.
539 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
540 DAG.getConstant(0, MVT::i32));
541 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0),
542 DAG.getConstant(1, MVT::i32));
543 SDOperand Amt = Op.getOperand(1);
545 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32,
546 DAG.getConstant(32, MVT::i32), Amt);
547 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt);
548 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1);
549 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3);
550 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt,
551 DAG.getConstant(-32U, MVT::i32));
552 SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5);
553 SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt);
554 SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32),
555 Tmp4, Tmp6, ISD::SETLE);
556 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi);
558 case ISD::ConstantPool: {
559 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
560 Constant *C = CP->get();
561 SDOperand CPI = DAG.getTargetConstantPool(C, MVT::i32, CP->getAlignment());
562 SDOperand Zero = DAG.getConstant(0, MVT::i32);
564 if (getTargetMachine().getRelocationModel() == Reloc::Static) {
565 // Generate non-pic code that has direct accesses to the constant pool.
566 // The address of the global is just (hi(&g)+lo(&g)).
567 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero);
568 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero);
569 return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
572 // Only lower ConstantPool on Darwin.
573 if (!getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin()) break;
574 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero);
575 if (getTargetMachine().getRelocationModel() == Reloc::PIC) {
576 // With PIC, the first instruction is actually "GR+hi(&G)".
577 Hi = DAG.getNode(ISD::ADD, MVT::i32,
578 DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);
581 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero);
582 Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
585 case ISD::GlobalAddress: {
586 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
587 GlobalValue *GV = GSDN->getGlobal();
588 SDOperand GA = DAG.getTargetGlobalAddress(GV, MVT::i32, GSDN->getOffset());
589 SDOperand Zero = DAG.getConstant(0, MVT::i32);
591 if (getTargetMachine().getRelocationModel() == Reloc::Static) {
592 // Generate non-pic code that has direct accesses to globals.
593 // The address of the global is just (hi(&g)+lo(&g)).
594 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero);
595 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero);
596 return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
599 // Only lower GlobalAddress on Darwin.
600 if (!getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin()) break;
602 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero);
603 if (getTargetMachine().getRelocationModel() == Reloc::PIC) {
604 // With PIC, the first instruction is actually "GR+hi(&G)".
605 Hi = DAG.getNode(ISD::ADD, MVT::i32,
606 DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi);
609 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero);
610 Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo);
612 if (!GV->hasWeakLinkage() && !GV->hasLinkOnceLinkage() &&
613 (!GV->isExternal() || GV->hasNotBeenReadFromBytecode()))
616 // If the global is weak or external, we have to go through the lazy
618 return DAG.getLoad(MVT::i32, DAG.getEntryNode(), Lo, DAG.getSrcValue(0));
621 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
623 // If we're comparing for equality to zero, expose the fact that this is
624 // implented as a ctlz/srl pair on ppc, so that the dag combiner can
625 // fold the new nodes.
626 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
627 if (C->isNullValue() && CC == ISD::SETEQ) {
628 MVT::ValueType VT = Op.getOperand(0).getValueType();
629 SDOperand Zext = Op.getOperand(0);
632 Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0));
634 unsigned Log2b = Log2_32(MVT::getSizeInBits(VT));
635 SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext);
636 SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz,
637 DAG.getConstant(Log2b, getShiftAmountTy()));
638 return DAG.getNode(ISD::TRUNCATE, getSetCCResultTy(), Scc);
640 // Leave comparisons against 0 and -1 alone for now, since they're usually
641 // optimized. FIXME: revisit this when we can custom lower all setcc
643 if (C->isAllOnesValue() || C->isNullValue())
647 // If we have an integer seteq/setne, turn it into a compare against zero
648 // by subtracting the rhs from the lhs, which is faster than setting a
649 // condition register, reading it back out, and masking the correct bit.
650 MVT::ValueType LHSVT = Op.getOperand(0).getValueType();
651 if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
652 MVT::ValueType VT = Op.getValueType();
653 SDOperand Sub = DAG.getNode(ISD::SUB, LHSVT, Op.getOperand(0),
655 return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC);
660 // vastart just stores the address of the VarArgsFrameIndex slot into the
661 // memory location argument.
662 // FIXME: Replace MVT::i32 with PointerTy
663 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
664 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR,
665 Op.getOperand(1), Op.getOperand(2));
670 switch(Op.getNumOperands()) {
672 assert(0 && "Do not know how to return this many arguments!");
675 return SDOperand(); // ret void is legal
677 MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
678 unsigned ArgReg = MVT::isInteger(ArgVT) ? PPC::R3 : PPC::F1;
679 Copy = DAG.getCopyToReg(Op.getOperand(0), ArgReg, Op.getOperand(1),
684 Copy = DAG.getCopyToReg(Op.getOperand(0), PPC::R3, Op.getOperand(2),
686 Copy = DAG.getCopyToReg(Copy, PPC::R4, Op.getOperand(1),Copy.getValue(1));
689 return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1));
691 case ISD::SCALAR_TO_VECTOR: {
692 // Create a stack slot that is 16-byte aligned.
693 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
694 int FrameIdx = FrameInfo->CreateStackObject(16, 16);
695 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32);
697 // Store the input value into Value#0 of the stack slot.
698 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(),
699 Op.getOperand(0), FIdx,DAG.getSrcValue(NULL));
701 return DAG.getNode(PPCISD::LVE_X, Op.getValueType(), Store, FIdx,
702 DAG.getSrcValue(NULL));
704 case ISD::BUILD_VECTOR:
705 // If this is a case we can't handle, return null and let the default
706 // expansion code take care of it. If we CAN select this case, return Op.
708 // See if this is all zeros.
709 // FIXME: We should handle splat(-0.0), and other cases here.
710 if (ISD::isBuildVectorAllZeros(Op.Val))
713 if (PPC::isVecSplatImm(Op.Val, 1) || // vspltisb
714 PPC::isVecSplatImm(Op.Val, 2) || // vspltish
715 PPC::isVecSplatImm(Op.Val, 4)) // vspltisw
720 case ISD::VECTOR_SHUFFLE: {
721 SDOperand V1 = Op.getOperand(0);
722 SDOperand V2 = Op.getOperand(1);
723 SDOperand PermMask = Op.getOperand(2);
725 // Cases that are handled by instructions that take permute immediates
726 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
727 // selected by the instruction selector.
728 if (PPC::isSplatShuffleMask(PermMask.Val) && V2.getOpcode() == ISD::UNDEF)
731 // TODO: Handle more cases, and also handle cases that are cheaper to do as
732 // multiple such instructions than as a constant pool load/vperm pair.
734 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
735 // vector that will get spilled to the constant pool.
736 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
738 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
739 // that it is in input element units, not in bytes. Convert now.
740 MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType());
741 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
743 std::vector<SDOperand> ResultMask;
744 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
745 unsigned SrcElt =cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
747 for (unsigned j = 0; j != BytesPerElement; ++j)
748 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
752 SDOperand VPermMask =DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask);
753 return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask);
755 case ISD::INTRINSIC: {
756 bool HasChain = Op.getOperand(0).getValueType() == MVT::Other;
757 unsigned IntNo=cast<ConstantSDNode>(Op.getOperand(HasChain))->getValue();
759 // If this is a lowered altivec predicate compare, CompareOpc is set to the
760 // opcode number of the comparison.
763 default: return SDOperand(); // Don't custom lower most intrinsics.
764 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; break;
765 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; break;
766 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; break;
767 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; break;
768 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; break;
769 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; break;
770 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; break;
771 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; break;
772 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; break;
773 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; break;
774 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; break;
775 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; break;
776 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; break;
779 assert(CompareOpc>0 && "We only lower altivec predicate compares so far!");
781 // Create the PPCISD altivec 'dot' comparison node.
782 std::vector<SDOperand> Ops;
783 std::vector<MVT::ValueType> VTs;
784 Ops.push_back(Op.getOperand(2)); // LHS
785 Ops.push_back(Op.getOperand(3)); // RHS
786 Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32));
787 VTs.push_back(Op.getOperand(2).getValueType());
788 VTs.push_back(MVT::Flag);
789 SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops);
791 // Now that we have the comparison, emit a copy from the CR to a GPR.
792 // This is flagged to the above dot comparison.
793 SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32,
794 DAG.getRegister(PPC::CR6, MVT::i32),
795 CompNode.getValue(1));
797 // Unpack the result based on how the target uses it.
798 unsigned BitNo; // Bit # of CR6.
799 bool InvertBit; // Invert result?
800 switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) {
801 default: // Can't happen, don't crash on invalid number though.
802 case 0: // Return the value of the EQ bit of CR6.
803 BitNo = 0; InvertBit = false;
805 case 1: // Return the inverted value of the EQ bit of CR6.
806 BitNo = 0; InvertBit = true;
808 case 2: // Return the value of the LT bit of CR6.
809 BitNo = 2; InvertBit = false;
811 case 3: // Return the inverted value of the LT bit of CR6.
812 BitNo = 2; InvertBit = true;
816 // Shift the bit into the low position.
817 Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags,
818 DAG.getConstant(8-(3-BitNo), MVT::i32));
820 Flags = DAG.getNode(ISD::AND, MVT::i32, Flags,
821 DAG.getConstant(1, MVT::i32));
823 // If we are supposed to, toggle the bit.
825 Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags,
826 DAG.getConstant(1, MVT::i32));
833 std::vector<SDOperand>
834 PPCTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
836 // add beautiful description of PPC stack frame format, or at least some docs
838 MachineFunction &MF = DAG.getMachineFunction();
839 MachineFrameInfo *MFI = MF.getFrameInfo();
840 MachineBasicBlock& BB = MF.front();
841 SSARegMap *RegMap = MF.getSSARegMap();
842 std::vector<SDOperand> ArgValues;
844 unsigned ArgOffset = 24;
845 unsigned GPR_remaining = 8;
846 unsigned FPR_remaining = 13;
847 unsigned GPR_idx = 0, FPR_idx = 0;
848 static const unsigned GPR[] = {
849 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
850 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
852 static const unsigned FPR[] = {
853 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
854 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
857 // Add DAG nodes to load the arguments... On entry to a function on PPC,
858 // the arguments start at offset 24, although they are likely to be passed
860 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
861 SDOperand newroot, argt;
863 bool needsLoad = false;
864 bool ArgLive = !I->use_empty();
865 MVT::ValueType ObjectVT = getValueType(I->getType());
868 default: assert(0 && "Unhandled argument type!");
875 if (GPR_remaining > 0) {
876 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
877 MF.addLiveIn(GPR[GPR_idx], VReg);
878 argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
879 if (ObjectVT != MVT::i32) {
880 unsigned AssertOp = I->getType()->isSigned() ? ISD::AssertSext
882 argt = DAG.getNode(AssertOp, MVT::i32, argt,
883 DAG.getValueType(ObjectVT));
884 argt = DAG.getNode(ISD::TRUNCATE, ObjectVT, argt);
893 if (GPR_remaining > 0) {
894 SDOperand argHi, argLo;
895 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
896 MF.addLiveIn(GPR[GPR_idx], VReg);
897 argHi = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
898 // If we have two or more remaining argument registers, then both halves
899 // of the i64 can be sourced from there. Otherwise, the lower half will
900 // have to come off the stack. This can happen when an i64 is preceded
901 // by 28 bytes of arguments.
902 if (GPR_remaining > 1) {
903 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
904 MF.addLiveIn(GPR[GPR_idx+1], VReg);
905 argLo = DAG.getCopyFromReg(argHi, VReg, MVT::i32);
907 int FI = MFI->CreateFixedObject(4, ArgOffset+4);
908 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
909 argLo = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN,
910 DAG.getSrcValue(NULL));
912 // Build the outgoing arg thingy
913 argt = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, argLo, argHi);
921 ObjSize = (ObjectVT == MVT::f64) ? 8 : 4;
923 if (FPR_remaining > 0) {
929 if (FPR_remaining > 0) {
931 if (ObjectVT == MVT::f32)
932 VReg = RegMap->createVirtualRegister(&PPC::F4RCRegClass);
934 VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass);
935 MF.addLiveIn(FPR[FPR_idx], VReg);
936 argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, ObjectVT);
945 // We need to load the argument to a virtual register if we determined above
946 // that we ran out of physical registers of the appropriate type
948 unsigned SubregOffset = 0;
949 if (ObjectVT == MVT::i8 || ObjectVT == MVT::i1) SubregOffset = 3;
950 if (ObjectVT == MVT::i16) SubregOffset = 2;
951 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
952 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
953 FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN,
954 DAG.getConstant(SubregOffset, MVT::i32));
955 argt = newroot = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN,
956 DAG.getSrcValue(NULL));
959 // Every 4 bytes of argument space consumes one of the GPRs available for
961 if (GPR_remaining > 0) {
962 unsigned delta = (GPR_remaining > 1 && ObjSize == 8) ? 2 : 1;
963 GPR_remaining -= delta;
966 ArgOffset += ObjSize;
968 DAG.setRoot(newroot.getValue(1));
970 ArgValues.push_back(argt);
973 // If the function takes variable number of arguments, make a frame index for
974 // the start of the first vararg value... for expansion of llvm.va_start.
976 VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);
977 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
978 // If this function is vararg, store any remaining integer argument regs
979 // to their spots on the stack so that they may be loaded by deferencing the
980 // result of va_next.
981 std::vector<SDOperand> MemOps;
982 for (; GPR_remaining > 0; --GPR_remaining, ++GPR_idx) {
983 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass);
984 MF.addLiveIn(GPR[GPR_idx], VReg);
985 SDOperand Val = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
986 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),
987 Val, FIN, DAG.getSrcValue(NULL));
988 MemOps.push_back(Store);
989 // Increment the address by four for the next argument to store
990 SDOperand PtrOff = DAG.getConstant(4, getPointerTy());
991 FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN, PtrOff);
993 if (!MemOps.empty()) {
994 MemOps.push_back(DAG.getRoot());
995 DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps));
999 // Finally, inform the code generator which regs we return values in.
1000 switch (getValueType(F.getReturnType())) {
1001 default: assert(0 && "Unknown type!");
1002 case MVT::isVoid: break;
1007 MF.addLiveOut(PPC::R3);
1010 MF.addLiveOut(PPC::R3);
1011 MF.addLiveOut(PPC::R4);
1015 MF.addLiveOut(PPC::F1);
1022 std::pair<SDOperand, SDOperand>
1023 PPCTargetLowering::LowerCallTo(SDOperand Chain,
1024 const Type *RetTy, bool isVarArg,
1025 unsigned CallingConv, bool isTailCall,
1026 SDOperand Callee, ArgListTy &Args,
1027 SelectionDAG &DAG) {
1028 // args_to_use will accumulate outgoing args for the PPCISD::CALL case in
1029 // SelectExpr to use to put the arguments in the appropriate registers.
1030 std::vector<SDOperand> args_to_use;
1032 // Count how many bytes are to be pushed on the stack, including the linkage
1033 // area, and parameter passing area.
1034 unsigned NumBytes = 24;
1037 Chain = DAG.getCALLSEQ_START(Chain,
1038 DAG.getConstant(NumBytes, getPointerTy()));
1040 for (unsigned i = 0, e = Args.size(); i != e; ++i) {
1041 switch (getValueType(Args[i].second)) {
1042 default: assert(0 && "Unknown value type!");
1057 // Just to be safe, we'll always reserve the full 24 bytes of linkage area
1058 // plus 32 bytes of argument space in case any called code gets funky on us.
1059 // (Required by ABI to support var arg)
1060 if (NumBytes < 56) NumBytes = 56;
1062 // Adjust the stack pointer for the new arguments...
1063 // These operations are automatically eliminated by the prolog/epilog pass
1064 Chain = DAG.getCALLSEQ_START(Chain,
1065 DAG.getConstant(NumBytes, getPointerTy()));
1067 // Set up a copy of the stack pointer for use loading and storing any
1068 // arguments that may not fit in the registers available for argument
1070 SDOperand StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
1072 // Figure out which arguments are going to go in registers, and which in
1073 // memory. Also, if this is a vararg function, floating point operations
1074 // must be stored to our stack, and loaded into integer regs as well, if
1075 // any integer regs are available for argument passing.
1076 unsigned ArgOffset = 24;
1077 unsigned GPR_remaining = 8;
1078 unsigned FPR_remaining = 13;
1080 std::vector<SDOperand> MemOps;
1081 for (unsigned i = 0, e = Args.size(); i != e; ++i) {
1082 // PtrOff will be used to store the current argument to the stack if a
1083 // register cannot be found for it.
1084 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1085 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
1086 MVT::ValueType ArgVT = getValueType(Args[i].second);
1089 default: assert(0 && "Unexpected ValueType for argument!");
1093 // Promote the integer to 32 bits. If the input type is signed use a
1094 // sign extend, otherwise use a zero extend.
1095 if (Args[i].second->isSigned())
1096 Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first);
1098 Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first);
1101 if (GPR_remaining > 0) {
1102 args_to_use.push_back(Args[i].first);
1105 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1106 Args[i].first, PtrOff,
1107 DAG.getSrcValue(NULL)));
1112 // If we have one free GPR left, we can place the upper half of the i64
1113 // in it, and store the other half to the stack. If we have two or more
1114 // free GPRs, then we can pass both halves of the i64 in registers.
1115 if (GPR_remaining > 0) {
1116 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
1117 Args[i].first, DAG.getConstant(1, MVT::i32));
1118 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
1119 Args[i].first, DAG.getConstant(0, MVT::i32));
1120 args_to_use.push_back(Hi);
1122 if (GPR_remaining > 0) {
1123 args_to_use.push_back(Lo);
1126 SDOperand ConstFour = DAG.getConstant(4, getPointerTy());
1127 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour);
1128 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1129 Lo, PtrOff, DAG.getSrcValue(NULL)));
1132 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1133 Args[i].first, PtrOff,
1134 DAG.getSrcValue(NULL)));
1140 if (FPR_remaining > 0) {
1141 args_to_use.push_back(Args[i].first);
1144 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Chain,
1145 Args[i].first, PtrOff,
1146 DAG.getSrcValue(NULL));
1147 MemOps.push_back(Store);
1148 // Float varargs are always shadowed in available integer registers
1149 if (GPR_remaining > 0) {
1150 SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,
1151 DAG.getSrcValue(NULL));
1152 MemOps.push_back(Load.getValue(1));
1153 args_to_use.push_back(Load);
1156 if (GPR_remaining > 0 && MVT::f64 == ArgVT) {
1157 SDOperand ConstFour = DAG.getConstant(4, getPointerTy());
1158 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour);
1159 SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff,
1160 DAG.getSrcValue(NULL));
1161 MemOps.push_back(Load.getValue(1));
1162 args_to_use.push_back(Load);
1166 // If we have any FPRs remaining, we may also have GPRs remaining.
1167 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
1169 if (GPR_remaining > 0) {
1170 args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
1173 if (GPR_remaining > 0 && MVT::f64 == ArgVT) {
1174 args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32));
1179 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1180 Args[i].first, PtrOff,
1181 DAG.getSrcValue(NULL)));
1183 ArgOffset += (ArgVT == MVT::f32) ? 4 : 8;
1187 if (!MemOps.empty())
1188 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps);
1191 std::vector<MVT::ValueType> RetVals;
1192 MVT::ValueType RetTyVT = getValueType(RetTy);
1193 MVT::ValueType ActualRetTyVT = RetTyVT;
1194 if (RetTyVT >= MVT::i1 && RetTyVT <= MVT::i16)
1195 ActualRetTyVT = MVT::i32; // Promote result to i32.
1197 if (RetTyVT == MVT::i64) {
1198 RetVals.push_back(MVT::i32);
1199 RetVals.push_back(MVT::i32);
1200 } else if (RetTyVT != MVT::isVoid) {
1201 RetVals.push_back(ActualRetTyVT);
1203 RetVals.push_back(MVT::Other);
1205 // If the callee is a GlobalAddress node (quite common, every direct call is)
1206 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1207 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1208 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
1210 std::vector<SDOperand> Ops;
1211 Ops.push_back(Chain);
1212 Ops.push_back(Callee);
1213 Ops.insert(Ops.end(), args_to_use.begin(), args_to_use.end());
1214 SDOperand TheCall = DAG.getNode(PPCISD::CALL, RetVals, Ops);
1215 Chain = TheCall.getValue(TheCall.Val->getNumValues()-1);
1216 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain,
1217 DAG.getConstant(NumBytes, getPointerTy()));
1218 SDOperand RetVal = TheCall;
1220 // If the result is a small value, add a note so that we keep track of the
1221 // information about whether it is sign or zero extended.
1222 if (RetTyVT != ActualRetTyVT) {
1223 RetVal = DAG.getNode(RetTy->isSigned() ? ISD::AssertSext : ISD::AssertZext,
1224 MVT::i32, RetVal, DAG.getValueType(RetTyVT));
1225 RetVal = DAG.getNode(ISD::TRUNCATE, RetTyVT, RetVal);
1226 } else if (RetTyVT == MVT::i64) {
1227 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, RetVal, RetVal.getValue(1));
1230 return std::make_pair(RetVal, Chain);
1234 PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
1235 MachineBasicBlock *BB) {
1236 assert((MI->getOpcode() == PPC::SELECT_CC_Int ||
1237 MI->getOpcode() == PPC::SELECT_CC_F4 ||
1238 MI->getOpcode() == PPC::SELECT_CC_F8) &&
1239 "Unexpected instr type to insert");
1241 // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
1242 // control-flow pattern. The incoming instruction knows the destination vreg
1243 // to set, the condition code register to branch on, the true/false values to
1244 // select between, and a branch opcode to use.
1245 const BasicBlock *LLVM_BB = BB->getBasicBlock();
1246 ilist<MachineBasicBlock>::iterator It = BB;
1252 // cmpTY ccX, r1, r2
1254 // fallthrough --> copy0MBB
1255 MachineBasicBlock *thisMBB = BB;
1256 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
1257 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
1258 BuildMI(BB, MI->getOperand(4).getImmedValue(), 2)
1259 .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
1260 MachineFunction *F = BB->getParent();
1261 F->getBasicBlockList().insert(It, copy0MBB);
1262 F->getBasicBlockList().insert(It, sinkMBB);
1263 // Update machine-CFG edges by first adding all successors of the current
1264 // block to the new block which will contain the Phi node for the select.
1265 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
1266 e = BB->succ_end(); i != e; ++i)
1267 sinkMBB->addSuccessor(*i);
1268 // Next, remove all successors of the current block, and add the true
1269 // and fallthrough blocks as its successors.
1270 while(!BB->succ_empty())
1271 BB->removeSuccessor(BB->succ_begin());
1272 BB->addSuccessor(copy0MBB);
1273 BB->addSuccessor(sinkMBB);
1276 // %FalseValue = ...
1277 // # fallthrough to sinkMBB
1280 // Update machine-CFG edges
1281 BB->addSuccessor(sinkMBB);
1284 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
1287 BuildMI(BB, PPC::PHI, 4, MI->getOperand(0).getReg())
1288 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
1289 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
1291 delete MI; // The pseudo instruction is gone now.
1295 SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N,
1296 DAGCombinerInfo &DCI) const {
1297 TargetMachine &TM = getTargetMachine();
1298 SelectionDAG &DAG = DCI.DAG;
1299 switch (N->getOpcode()) {
1301 case ISD::SINT_TO_FP:
1302 if (TM.getSubtarget<PPCSubtarget>().is64Bit()) {
1303 if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
1304 // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
1305 // We allow the src/dst to be either f32/f64, but the intermediate
1306 // type must be i64.
1307 if (N->getOperand(0).getValueType() == MVT::i64) {
1308 SDOperand Val = N->getOperand(0).getOperand(0);
1309 if (Val.getValueType() == MVT::f32) {
1310 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
1311 DCI.AddToWorklist(Val.Val);
1314 Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val);
1315 DCI.AddToWorklist(Val.Val);
1316 Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val);
1317 DCI.AddToWorklist(Val.Val);
1318 if (N->getValueType(0) == MVT::f32) {
1319 Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val);
1320 DCI.AddToWorklist(Val.Val);
1323 } else if (N->getOperand(0).getValueType() == MVT::i32) {
1324 // If the intermediate type is i32, we can avoid the load/store here
1331 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
1332 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
1333 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
1334 N->getOperand(1).getValueType() == MVT::i32) {
1335 SDOperand Val = N->getOperand(1).getOperand(0);
1336 if (Val.getValueType() == MVT::f32) {
1337 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val);
1338 DCI.AddToWorklist(Val.Val);
1340 Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val);
1341 DCI.AddToWorklist(Val.Val);
1343 Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val,
1344 N->getOperand(2), N->getOperand(3));
1345 DCI.AddToWorklist(Val.Val);
1354 /// getConstraintType - Given a constraint letter, return the type of
1355 /// constraint it is for this target.
1356 PPCTargetLowering::ConstraintType
1357 PPCTargetLowering::getConstraintType(char ConstraintLetter) const {
1358 switch (ConstraintLetter) {
1365 return C_RegisterClass;
1367 return TargetLowering::getConstraintType(ConstraintLetter);
1371 std::vector<unsigned> PPCTargetLowering::
1372 getRegClassForInlineAsmConstraint(const std::string &Constraint,
1373 MVT::ValueType VT) const {
1374 if (Constraint.size() == 1) {
1375 switch (Constraint[0]) { // GCC RS6000 Constraint Letters
1376 default: break; // Unknown constriant letter
1378 return make_vector<unsigned>(/*no R0*/ PPC::R1 , PPC::R2 , PPC::R3 ,
1379 PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 ,
1380 PPC::R8 , PPC::R9 , PPC::R10, PPC::R11,
1381 PPC::R12, PPC::R13, PPC::R14, PPC::R15,
1382 PPC::R16, PPC::R17, PPC::R18, PPC::R19,
1383 PPC::R20, PPC::R21, PPC::R22, PPC::R23,
1384 PPC::R24, PPC::R25, PPC::R26, PPC::R27,
1385 PPC::R28, PPC::R29, PPC::R30, PPC::R31,
1388 return make_vector<unsigned>(PPC::R0 , PPC::R1 , PPC::R2 , PPC::R3 ,
1389 PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 ,
1390 PPC::R8 , PPC::R9 , PPC::R10, PPC::R11,
1391 PPC::R12, PPC::R13, PPC::R14, PPC::R15,
1392 PPC::R16, PPC::R17, PPC::R18, PPC::R19,
1393 PPC::R20, PPC::R21, PPC::R22, PPC::R23,
1394 PPC::R24, PPC::R25, PPC::R26, PPC::R27,
1395 PPC::R28, PPC::R29, PPC::R30, PPC::R31,
1398 return make_vector<unsigned>(PPC::F0 , PPC::F1 , PPC::F2 , PPC::F3 ,
1399 PPC::F4 , PPC::F5 , PPC::F6 , PPC::F7 ,
1400 PPC::F8 , PPC::F9 , PPC::F10, PPC::F11,
1401 PPC::F12, PPC::F13, PPC::F14, PPC::F15,
1402 PPC::F16, PPC::F17, PPC::F18, PPC::F19,
1403 PPC::F20, PPC::F21, PPC::F22, PPC::F23,
1404 PPC::F24, PPC::F25, PPC::F26, PPC::F27,
1405 PPC::F28, PPC::F29, PPC::F30, PPC::F31,
1408 return make_vector<unsigned>(PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 ,
1409 PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
1410 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11,
1411 PPC::V12, PPC::V13, PPC::V14, PPC::V15,
1412 PPC::V16, PPC::V17, PPC::V18, PPC::V19,
1413 PPC::V20, PPC::V21, PPC::V22, PPC::V23,
1414 PPC::V24, PPC::V25, PPC::V26, PPC::V27,
1415 PPC::V28, PPC::V29, PPC::V30, PPC::V31,
1418 return make_vector<unsigned>(PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
1419 PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7,
1424 return std::vector<unsigned>();
1427 // isOperandValidForConstraint
1428 bool PPCTargetLowering::
1429 isOperandValidForConstraint(SDOperand Op, char Letter) {
1440 if (!isa<ConstantSDNode>(Op)) return false; // Must be an immediate.
1441 unsigned Value = cast<ConstantSDNode>(Op)->getValue();
1443 default: assert(0 && "Unknown constraint letter!");
1444 case 'I': // "I" is a signed 16-bit constant.
1445 return (short)Value == (int)Value;
1446 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
1447 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
1448 return (short)Value == 0;
1449 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
1450 return (Value >> 16) == 0;
1451 case 'M': // "M" is a constant that is greater than 31.
1453 case 'N': // "N" is a positive constant that is an exact power of two.
1454 return (int)Value > 0 && isPowerOf2_32(Value);
1455 case 'O': // "O" is the constant zero.
1457 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
1458 return (short)-Value == (int)-Value;
1464 // Handle standard constraint letters.
1465 return TargetLowering::isOperandValidForConstraint(Op, Letter);
1468 /// isLegalAddressImmediate - Return true if the integer value can be used
1469 /// as the offset of the target addressing mode.
1470 bool PPCTargetLowering::isLegalAddressImmediate(int64_t V) const {
1471 // PPC allows a sign-extended 16-bit immediate field.
1472 return (V > -(1 << 16) && V < (1 << 16)-1);