- const std::string& Name = F->getName();
-
- // Upgrade ARM NEON intrinsics.
- if (Name.compare(5, 9, "arm.neon.", 9) == 0) {
- Instruction *NewI;
- Value *V0, *V1;
- if (Name.compare(14, 7, "vmovls.", 7) == 0) {
- NewI = new SExtInst(CI->getArgOperand(0), CI->getType(),
- "upgraded." + CI->getName(), CI);
- } else if (Name.compare(14, 7, "vmovlu.", 7) == 0) {
- NewI = new ZExtInst(CI->getArgOperand(0), CI->getType(),
- "upgraded." + CI->getName(), CI);
- } else if (Name.compare(14, 4, "vadd", 4) == 0) {
- ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
- NewI = BinaryOperator::CreateAdd(V0, V1, "upgraded."+CI->getName(), CI);
- } else if (Name.compare(14, 4, "vsub", 4) == 0) {
- ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
- NewI = BinaryOperator::CreateSub(V0, V1,"upgraded."+CI->getName(),CI);
- } else if (Name.compare(14, 4, "vmul", 4) == 0) {
- ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
- NewI = BinaryOperator::CreateMul(V0, V1,"upgraded."+CI->getName(),CI);
- } else if (Name.compare(14, 4, "vmla", 4) == 0) {
- ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1);
- Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI);
- NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), MulI,
- "upgraded."+CI->getName(), CI);
- } else if (Name.compare(14, 4, "vmls", 4) == 0) {
- ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1);
- Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI);
- NewI = BinaryOperator::CreateSub(CI->getArgOperand(0), MulI,
- "upgraded."+CI->getName(), CI);
- } else if (Name.compare(14, 4, "vabd", 4) == 0) {
- NewI = CallVABD(CI, CI->getArgOperand(0), CI->getArgOperand(1));
- NewI = new ZExtInst(NewI, CI->getType(), "upgraded."+CI->getName(), CI);
- } else if (Name.compare(14, 4, "vaba", 4) == 0) {
- NewI = CallVABD(CI, CI->getArgOperand(1), CI->getArgOperand(2));
- if (Name.at(18) == 'l')
- NewI = new ZExtInst(NewI, CI->getType(), "", CI);
- NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), NewI,
- "upgraded."+CI->getName(), CI);
- } else if (Name.compare(14, 6, "vmovn.", 6) == 0) {
- NewI = new TruncInst(CI->getArgOperand(0), CI->getType(),
- "upgraded." + CI->getName(), CI);
- } else {
- llvm_unreachable("Unknown arm.neon function for CallInst upgrade.");
- }
- // Replace any uses of the old CallInst.
- if (!CI->use_empty())
- CI->replaceAllUsesWith(NewI);
- CI->eraseFromParent();
- return;
- }
-
- bool isLoadH = false, isLoadL = false, isMovL = false;
- bool isMovSD = false, isShufPD = false;
- bool isUnpckhPD = false, isUnpcklPD = false;
- bool isPunpckhQPD = false, isPunpcklQPD = false;
- if (F->getName() == "llvm.x86.sse2.loadh.pd")
- isLoadH = true;
- else if (F->getName() == "llvm.x86.sse2.loadl.pd")
- isLoadL = true;
- else if (F->getName() == "llvm.x86.sse2.movl.dq")
- isMovL = true;
- else if (F->getName() == "llvm.x86.sse2.movs.d")
- isMovSD = true;
- else if (F->getName() == "llvm.x86.sse2.shuf.pd")
- isShufPD = true;
- else if (F->getName() == "llvm.x86.sse2.unpckh.pd")
- isUnpckhPD = true;
- else if (F->getName() == "llvm.x86.sse2.unpckl.pd")
- isUnpcklPD = true;
- else if (F->getName() == "llvm.x86.sse2.punpckh.qdq")
- isPunpckhQPD = true;
- else if (F->getName() == "llvm.x86.sse2.punpckl.qdq")
- isPunpcklQPD = true;
-
- if (isLoadH || isLoadL || isMovL || isMovSD || isShufPD ||
- isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
- std::vector<Constant*> Idxs;
- Value *Op0 = CI->getArgOperand(0);
- ShuffleVectorInst *SI = NULL;
- if (isLoadH || isLoadL) {
- Value *Op1 = UndefValue::get(Op0->getType());
- Value *Addr = new BitCastInst(CI->getArgOperand(1),
- Type::getDoublePtrTy(C),
- "upgraded.", CI);
- Value *Load = new LoadInst(Addr, "upgraded.", false, 8, CI);
- Value *Idx = ConstantInt::get(Type::getInt32Ty(C), 0);
- Op1 = InsertElementInst::Create(Op1, Load, Idx, "upgraded.", CI);
-
- if (isLoadH) {
- Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
- Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
- } else {
- Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
- Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
- }
- Value *Mask = ConstantVector::get(Idxs);
- SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
- } else if (isMovL) {
- Constant *Zero = ConstantInt::get(Type::getInt32Ty(C), 0);
- Idxs.push_back(Zero);
- Idxs.push_back(Zero);
- Idxs.push_back(Zero);
- Idxs.push_back(Zero);
- Value *ZeroV = ConstantVector::get(Idxs);
-
- Idxs.clear();
- Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 4));
- Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 5));
- Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
- Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 3));
- Value *Mask = ConstantVector::get(Idxs);
- SI = new ShuffleVectorInst(ZeroV, Op0, Mask, "upgraded.", CI);
- } else if (isMovSD ||
- isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
- Value *Op1 = CI->getArgOperand(1);
- if (isMovSD) {
- Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
- Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
- } else if (isUnpckhPD || isPunpckhQPD) {
- Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
- Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 3));
- } else {
- Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
- Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
- }
- Value *Mask = ConstantVector::get(Idxs);
- SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
- } else if (isShufPD) {
- Value *Op1 = CI->getArgOperand(1);
- unsigned MaskVal =
- cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
- Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), MaskVal & 1));
- Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C),
- ((MaskVal >> 1) & 1)+2));
- Value *Mask = ConstantVector::get(Idxs);
- SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
- }
-
- assert(SI && "Unexpected!");
-
- // Handle any uses of the old CallInst.
- if (!CI->use_empty())
- // Replace all uses of the old call with the new cast which has the
- // correct type.
- CI->replaceAllUsesWith(SI);
-
- // Clean up the old call now that it has been completely upgraded.
- CI->eraseFromParent();
- } else if (F->getName() == "llvm.x86.sse41.pmulld") {
- // Upgrade this set of intrinsics into vector multiplies.
- Instruction *Mul = BinaryOperator::CreateMul(CI->getArgOperand(0),
- CI->getArgOperand(1),
- CI->getName(),
- CI);
- // Fix up all the uses with our new multiply.
- if (!CI->use_empty())
- CI->replaceAllUsesWith(Mul);
-
- // Remove upgraded multiply.
- CI->eraseFromParent();
- } else if (F->getName() == "llvm.x86.ssse3.palign.r") {
- Value *Op1 = CI->getArgOperand(0);
- Value *Op2 = CI->getArgOperand(1);
- Value *Op3 = CI->getArgOperand(2);
- unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
- Value *Rep;
- IRBuilder<> Builder(C);
- Builder.SetInsertPoint(CI->getParent(), CI);
-
- // If palignr is shifting the pair of input vectors less than 9 bytes,
- // emit a shuffle instruction.
- if (shiftVal <= 8) {
- const Type *IntTy = Type::getInt32Ty(C);
- const Type *EltTy = Type::getInt8Ty(C);
- const Type *VecTy = VectorType::get(EltTy, 8);
-
- Op2 = Builder.CreateBitCast(Op2, VecTy);
- Op1 = Builder.CreateBitCast(Op1, VecTy);
-
- llvm::SmallVector<llvm::Constant*, 8> Indices;
- for (unsigned i = 0; i != 8; ++i)
- Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
-
- Value *SV = ConstantVector::get(Indices);
- Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
- Rep = Builder.CreateBitCast(Rep, F->getReturnType());
- }
-
- // If palignr is shifting the pair of input vectors more than 8 but less
- // than 16 bytes, emit a logical right shift of the destination.
- else if (shiftVal < 16) {
- // MMX has these as 1 x i64 vectors for some odd optimization reasons.
- const Type *EltTy = Type::getInt64Ty(C);
- const Type *VecTy = VectorType::get(EltTy, 1);
-
- Op1 = Builder.CreateBitCast(Op1, VecTy, "cast");
- Op2 = ConstantInt::get(VecTy, (shiftVal-8) * 8);
-
- // create i32 constant
- Function *I =
- Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_mmx_psrl_q);
- Rep = Builder.CreateCall2(I, Op1, Op2, "palignr");
- }
-
- // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
- else {
- Rep = Constant::getNullValue(F->getReturnType());
- }
-
- // Replace any uses with our new instruction.
- if (!CI->use_empty())
- CI->replaceAllUsesWith(Rep);
-
- // Remove upgraded instruction.
- CI->eraseFromParent();
-
- } else if (F->getName() == "llvm.x86.ssse3.palign.r.128") {
- Value *Op1 = CI->getArgOperand(0);
- Value *Op2 = CI->getArgOperand(1);
- Value *Op3 = CI->getArgOperand(2);
- unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
- Value *Rep;
- IRBuilder<> Builder(C);
- Builder.SetInsertPoint(CI->getParent(), CI);
-
- // If palignr is shifting the pair of input vectors less than 17 bytes,
- // emit a shuffle instruction.
- if (shiftVal <= 16) {
- const Type *IntTy = Type::getInt32Ty(C);
- const Type *EltTy = Type::getInt8Ty(C);
- const Type *VecTy = VectorType::get(EltTy, 16);
-
- Op2 = Builder.CreateBitCast(Op2, VecTy);
- Op1 = Builder.CreateBitCast(Op1, VecTy);
-
- llvm::SmallVector<llvm::Constant*, 16> Indices;
- for (unsigned i = 0; i != 16; ++i)
- Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
-
- Value *SV = ConstantVector::get(Indices);
- Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
- Rep = Builder.CreateBitCast(Rep, F->getReturnType());
- }
-
- // If palignr is shifting the pair of input vectors more than 16 but less
- // than 32 bytes, emit a logical right shift of the destination.
- else if (shiftVal < 32) {
- const Type *EltTy = Type::getInt64Ty(C);
- const Type *VecTy = VectorType::get(EltTy, 2);
- const Type *IntTy = Type::getInt32Ty(C);
-
- Op1 = Builder.CreateBitCast(Op1, VecTy, "cast");
- Op2 = ConstantInt::get(IntTy, (shiftVal-16) * 8);
-
- // create i32 constant
- Function *I =
- Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_sse2_psrl_dq);
- Rep = Builder.CreateCall2(I, Op1, Op2, "palignr");
- }
-
- // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
- else {
- Rep = Constant::getNullValue(F->getReturnType());
- }
-
- // Replace any uses with our new instruction.
- if (!CI->use_empty())
- CI->replaceAllUsesWith(Rep);
-
- // Remove upgraded instruction.
- CI->eraseFromParent();
-
- } else if (F->getName() == "llvm.x86.sse.loadu.ps" ||
- F->getName() == "llvm.x86.sse2.loadu.dq" ||
- F->getName() == "llvm.x86.sse2.loadu.pd") {
- // Convert to a native, unaligned load.
- const Type *VecTy = CI->getType();
- const Type *IntTy = IntegerType::get(C, 128);
- IRBuilder<> Builder(C);
- Builder.SetInsertPoint(CI->getParent(), CI);
-
- Value *BC = Builder.CreateBitCast(CI->getArgOperand(0),
- PointerType::getUnqual(IntTy),
- "cast");
- LoadInst *LI = Builder.CreateLoad(BC, CI->getName());
- LI->setAlignment(1); // Unaligned load.
- BC = Builder.CreateBitCast(LI, VecTy, "new.cast");
-
- // Fix up all the uses with our new load.
- if (!CI->use_empty())
- CI->replaceAllUsesWith(BC);
-
- // Remove intrinsic.
- CI->eraseFromParent();
- } else if (F->getName() == "llvm.x86.sse.movnt.ps" ||
- F->getName() == "llvm.x86.sse2.movnt.dq" ||
- F->getName() == "llvm.x86.sse2.movnt.pd" ||
- F->getName() == "llvm.x86.sse2.movnt.i") {