return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
}
}
+
+ // We can strength reduce reduce this signed add into a regular add if we
+ // can prove that it will never overflow.
+ if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) {
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ if (WillNotOverflowSignedAdd(LHS, RHS)) {
+ Value *Add = Builder->CreateNSWAdd(LHS, RHS);
+ Add->takeName(&CI);
+ Constant *V[] = {UndefValue::get(Add->getType()), Builder->getFalse()};
+ StructType *ST = cast<StructType>(II->getType());
+ Constant *Struct = ConstantStruct::get(ST, V);
+ return InsertValueInst::Create(Struct, Add, 0);
+ }
+ }
+
break;
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
+ // Note that ppc_altivec_vperm has a big-endian bias, so when creating
+ // a vectorshuffle for little endian, we must undo the transformation
+ // performed on vec_perm in altivec.h. That is, we must complement
+ // the permutation mask with respect to 31 and reverse the order of
+ // V1 and V2.
if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
assert(Mask->getType()->getVectorNumElements() == 16 &&
"Bad type for intrinsic!");
unsigned Idx =
cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
Idx &= 31; // Match the hardware behavior.
+ if (DL && DL->isLittleEndian())
+ Idx = 31 - Idx;
if (!ExtractedElts[Idx]) {
+ Value *Op0ToUse = (DL && DL->isLittleEndian()) ? Op1 : Op0;
+ Value *Op1ToUse = (DL && DL->isLittleEndian()) ? Op0 : Op1;
ExtractedElts[Idx] =
- Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
+ Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
Builder->getInt32(Idx&15));
}
break;
}
+ case Intrinsic::AMDGPU_rcp: {
+ if (const ConstantFP *C = dyn_cast<ConstantFP>(II->getArgOperand(0))) {
+ const APFloat &ArgVal = C->getValueAPF();
+ APFloat Val(ArgVal.getSemantics(), 1.0);
+ APFloat::opStatus Status = Val.divide(ArgVal,
+ APFloat::rmNearestTiesToEven);
+ // Only do this if it was exact and therefore not dependent on the
+ // rounding mode.
+ if (Status == APFloat::opOK)
+ return ReplaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));
+ }
+
+ break;
+ }
case Intrinsic::stackrestore: {
// If the save is right next to the restore, remove the restore. This can
// happen when variable allocas are DCE'd.
return EraseInstFromFunction(CI);
break;
}
+ case Intrinsic::assume: {
+ // Canonicalize assume(a && b) -> assume(a); assume(b);
+ Value *IIOperand = II->getArgOperand(0), *A, *B,
+ *AssumeIntrinsic = II->getCalledValue();
+ if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
+ Builder->CreateCall(AssumeIntrinsic, A, II->getName());
+ Builder->CreateCall(AssumeIntrinsic, B, II->getName());
+ return EraseInstFromFunction(*II);
+ }
+ // assume(!(a || b)) -> assume(!a); assume(!b);
+ if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
+ Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(A), II->getName());
+ Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(B), II->getName());
+ return EraseInstFromFunction(*II);
+ }
+ break;
+ }
}
return visitCallSite(II);