1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/IR/CFG.h"
18 #include "llvm/IR/CallSite.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DiagnosticInfo.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/Regex.h"
34 // Upgrade the declarations of the SSE4.1 functions whose arguments have
35 // changed their type from v4f32 to v2i64.
36 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
38 // Check whether this is an old version of the function, which received
40 Type *Arg0Type = F->getFunctionType()->getParamType(0);
41 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
44 // Yes, it's old, replace it with new version.
45 F->setName(F->getName() + ".old");
46 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
50 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
51 // arguments have changed their type from i32 to i8.
52 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
54 // Check that the last argument is an i32.
55 Type *LastArgType = F->getFunctionType()->getParamType(
56 F->getFunctionType()->getNumParams() - 1);
57 if (!LastArgType->isIntegerTy(32))
60 // Move this function aside and map down.
61 F->setName(F->getName() + ".old");
62 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
66 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
67 assert(F && "Illegal to upgrade a non-existent Function.");
69 // Quickly eliminate it, if it's not a candidate.
70 StringRef Name = F->getName();
71 if (Name.size() <= 8 || !Name.startswith("llvm."))
73 Name = Name.substr(5); // Strip off "llvm."
78 if (Name.startswith("arm.neon.vclz")) {
80 F->arg_begin()->getType(),
81 Type::getInt1Ty(F->getContext())
83 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
84 // the end of the name. Change name from llvm.arm.neon.vclz.* to
86 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
87 NewFn = Function::Create(fType, F->getLinkage(),
88 "llvm.ctlz." + Name.substr(14), F->getParent());
91 if (Name.startswith("arm.neon.vcnt")) {
92 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
93 F->arg_begin()->getType());
96 Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
97 if (vldRegex.match(Name)) {
98 auto fArgs = F->getFunctionType()->params();
99 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
100 // Can't use Intrinsic::getDeclaration here as the return types might
101 // then only be structurally equal.
102 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
103 NewFn = Function::Create(fType, F->getLinkage(),
104 "llvm." + Name + ".p0i8", F->getParent());
107 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
108 if (vstRegex.match(Name)) {
109 static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
110 Intrinsic::arm_neon_vst2,
111 Intrinsic::arm_neon_vst3,
112 Intrinsic::arm_neon_vst4};
114 static const Intrinsic::ID StoreLaneInts[] = {
115 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
116 Intrinsic::arm_neon_vst4lane
119 auto fArgs = F->getFunctionType()->params();
120 Type *Tys[] = {fArgs[0], fArgs[1]};
121 if (Name.find("lane") == StringRef::npos)
122 NewFn = Intrinsic::getDeclaration(F->getParent(),
123 StoreInts[fArgs.size() - 3], Tys);
125 NewFn = Intrinsic::getDeclaration(F->getParent(),
126 StoreLaneInts[fArgs.size() - 5], Tys);
133 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
134 F->setName(Name + ".old");
135 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
136 F->arg_begin()->getType());
139 if (Name.startswith("cttz.") && F->arg_size() == 1) {
140 F->setName(Name + ".old");
141 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
142 F->arg_begin()->getType());
149 // We only need to change the name to match the mangling including the
151 if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
152 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
153 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
154 F->setName(Name + ".old");
155 NewFn = Intrinsic::getDeclaration(F->getParent(),
156 Intrinsic::objectsize, Tys);
163 if (Name.startswith("x86.sse2.pcmpeq.") ||
164 Name.startswith("x86.sse2.pcmpgt.") ||
165 Name.startswith("x86.avx2.pcmpeq.") ||
166 Name.startswith("x86.avx2.pcmpgt.") ||
167 Name.startswith("x86.avx2.vbroadcast") ||
168 Name.startswith("x86.avx2.pbroadcast") ||
169 Name.startswith("x86.avx.vpermil.") ||
170 Name.startswith("x86.sse41.pmovsx") ||
171 Name == "x86.avx.vinsertf128.pd.256" ||
172 Name == "x86.avx.vinsertf128.ps.256" ||
173 Name == "x86.avx.vinsertf128.si.256" ||
174 Name == "x86.avx2.vinserti128" ||
175 Name == "x86.avx.vextractf128.pd.256" ||
176 Name == "x86.avx.vextractf128.ps.256" ||
177 Name == "x86.avx.vextractf128.si.256" ||
178 Name == "x86.avx2.vextracti128" ||
179 Name == "x86.avx.movnt.dq.256" ||
180 Name == "x86.avx.movnt.pd.256" ||
181 Name == "x86.avx.movnt.ps.256" ||
182 Name == "x86.sse42.crc32.64.8" ||
183 Name == "x86.avx.vbroadcast.ss" ||
184 Name == "x86.avx.vbroadcast.ss.256" ||
185 Name == "x86.avx.vbroadcast.sd.256" ||
186 Name == "x86.sse2.psll.dq" ||
187 Name == "x86.sse2.psrl.dq" ||
188 Name == "x86.avx2.psll.dq" ||
189 Name == "x86.avx2.psrl.dq" ||
190 Name == "x86.sse2.psll.dq.bs" ||
191 Name == "x86.sse2.psrl.dq.bs" ||
192 Name == "x86.avx2.psll.dq.bs" ||
193 Name == "x86.avx2.psrl.dq.bs" ||
194 Name == "x86.sse41.pblendw" ||
195 Name == "x86.sse41.blendpd" ||
196 Name == "x86.sse41.blendps" ||
197 Name == "x86.avx.blend.pd.256" ||
198 Name == "x86.avx.blend.ps.256" ||
199 Name == "x86.avx2.pblendw" ||
200 Name == "x86.avx2.pblendd.128" ||
201 Name == "x86.avx2.pblendd.256" ||
202 Name == "x86.avx2.vbroadcasti128" ||
203 Name == "x86.xop.vpcmov" ||
204 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
208 // SSE4.1 ptest functions may have an old signature.
209 if (Name.startswith("x86.sse41.ptest")) {
210 if (Name == "x86.sse41.ptestc")
211 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
212 if (Name == "x86.sse41.ptestz")
213 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
214 if (Name == "x86.sse41.ptestnzc")
215 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
217 // Several blend and other instructions with masks used the wrong number of
219 if (Name == "x86.sse41.insertps")
220 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
222 if (Name == "x86.sse41.dppd")
223 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
225 if (Name == "x86.sse41.dpps")
226 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
228 if (Name == "x86.sse41.mpsadbw")
229 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
231 if (Name == "x86.avx.dp.ps.256")
232 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
234 if (Name == "x86.avx2.mpsadbw")
235 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
238 // frcz.ss/sd may need to have an argument dropped
239 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
240 F->setName(Name + ".old");
241 NewFn = Intrinsic::getDeclaration(F->getParent(),
242 Intrinsic::x86_xop_vfrcz_ss);
245 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
246 F->setName(Name + ".old");
247 NewFn = Intrinsic::getDeclaration(F->getParent(),
248 Intrinsic::x86_xop_vfrcz_sd);
251 // Fix the FMA4 intrinsics to remove the 4
252 if (Name.startswith("x86.fma4.")) {
253 F->setName("llvm.x86.fma" + Name.substr(8));
261 // This may not belong here. This function is effectively being overloaded
262 // to both detect an intrinsic which needs upgrading, and to provide the
263 // upgraded form of the intrinsic. We should perhaps have two separate
264 // functions for this.
268 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
270 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
271 assert(F != NewFn && "Intrinsic function upgraded to the same function");
273 // Upgrade intrinsic attributes. This does not change the function.
276 if (Intrinsic::ID id = F->getIntrinsicID())
277 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
281 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
282 // Nothing to do yet.
286 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them
288 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
289 Value *Op, unsigned NumLanes,
291 // Each lane is 16 bytes.
292 unsigned NumElts = NumLanes * 16;
294 // Bitcast from a 64-bit element type to a byte element type.
295 Op = Builder.CreateBitCast(Op,
296 VectorType::get(Type::getInt8Ty(C), NumElts),
298 // We'll be shuffling in zeroes.
299 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
301 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
302 // we'll just return the zero vector.
304 SmallVector<Constant*, 32> Idxs;
305 // 256-bit version is split into two 16-byte lanes.
306 for (unsigned l = 0; l != NumElts; l += 16)
307 for (unsigned i = 0; i != 16; ++i) {
308 unsigned Idx = NumElts + i - Shift;
310 Idx -= NumElts - 16; // end of lane, switch operand.
311 Idxs.push_back(Builder.getInt32(Idx + l));
314 Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs));
317 // Bitcast back to a 64-bit element type.
318 return Builder.CreateBitCast(Res,
319 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
323 // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them
325 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
326 Value *Op, unsigned NumLanes,
328 // Each lane is 16 bytes.
329 unsigned NumElts = NumLanes * 16;
331 // Bitcast from a 64-bit element type to a byte element type.
332 Op = Builder.CreateBitCast(Op,
333 VectorType::get(Type::getInt8Ty(C), NumElts),
335 // We'll be shuffling in zeroes.
336 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
338 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
339 // we'll just return the zero vector.
341 SmallVector<Constant*, 32> Idxs;
342 // 256-bit version is split into two 16-byte lanes.
343 for (unsigned l = 0; l != NumElts; l += 16)
344 for (unsigned i = 0; i != 16; ++i) {
345 unsigned Idx = i + Shift;
347 Idx += NumElts - 16; // end of lane, switch operand.
348 Idxs.push_back(Builder.getInt32(Idx + l));
351 Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs));
354 // Bitcast back to a 64-bit element type.
355 return Builder.CreateBitCast(Res,
356 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
360 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
361 // upgraded intrinsic. All argument and return casting must be provided in
362 // order to seamlessly integrate with existing context.
363 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
364 Function *F = CI->getCalledFunction();
365 LLVMContext &C = CI->getContext();
366 IRBuilder<> Builder(C);
367 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
369 assert(F && "Intrinsic call is not direct?");
372 // Get the Function's name.
373 StringRef Name = F->getName();
376 // Upgrade packed integer vector compares intrinsics to compare instructions
377 if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
378 Name.startswith("llvm.x86.avx2.pcmpeq.")) {
379 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
381 // need to sign extend since icmp returns vector of i1
382 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
383 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
384 Name.startswith("llvm.x86.avx2.pcmpgt.")) {
385 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
387 // need to sign extend since icmp returns vector of i1
388 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
389 } else if (Name == "llvm.x86.avx.movnt.dq.256" ||
390 Name == "llvm.x86.avx.movnt.ps.256" ||
391 Name == "llvm.x86.avx.movnt.pd.256") {
392 IRBuilder<> Builder(C);
393 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
395 Module *M = F->getParent();
396 SmallVector<Metadata *, 1> Elts;
398 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
399 MDNode *Node = MDNode::get(C, Elts);
401 Value *Arg0 = CI->getArgOperand(0);
402 Value *Arg1 = CI->getArgOperand(1);
404 // Convert the type of the pointer to a pointer to the stored type.
405 Value *BC = Builder.CreateBitCast(Arg0,
406 PointerType::getUnqual(Arg1->getType()),
408 StoreInst *SI = Builder.CreateStore(Arg1, BC);
409 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
410 SI->setAlignment(32);
413 CI->eraseFromParent();
415 } else if (Name.startswith("llvm.x86.xop.vpcom")) {
417 if (Name.endswith("ub"))
418 intID = Intrinsic::x86_xop_vpcomub;
419 else if (Name.endswith("uw"))
420 intID = Intrinsic::x86_xop_vpcomuw;
421 else if (Name.endswith("ud"))
422 intID = Intrinsic::x86_xop_vpcomud;
423 else if (Name.endswith("uq"))
424 intID = Intrinsic::x86_xop_vpcomuq;
425 else if (Name.endswith("b"))
426 intID = Intrinsic::x86_xop_vpcomb;
427 else if (Name.endswith("w"))
428 intID = Intrinsic::x86_xop_vpcomw;
429 else if (Name.endswith("d"))
430 intID = Intrinsic::x86_xop_vpcomd;
431 else if (Name.endswith("q"))
432 intID = Intrinsic::x86_xop_vpcomq;
434 llvm_unreachable("Unknown suffix");
436 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom"
438 if (Name.startswith("lt"))
440 else if (Name.startswith("le"))
442 else if (Name.startswith("gt"))
444 else if (Name.startswith("ge"))
446 else if (Name.startswith("eq"))
448 else if (Name.startswith("ne"))
450 else if (Name.startswith("false"))
452 else if (Name.startswith("true"))
455 llvm_unreachable("Unknown condition");
457 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
459 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
460 Builder.getInt8(Imm)});
461 } else if (Name == "llvm.x86.xop.vpcmov") {
462 Value *Arg0 = CI->getArgOperand(0);
463 Value *Arg1 = CI->getArgOperand(1);
464 Value *Sel = CI->getArgOperand(2);
465 unsigned NumElts = CI->getType()->getVectorNumElements();
466 Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1));
467 Value *NotSel = Builder.CreateXor(Sel, MinusOne);
468 Value *Sel0 = Builder.CreateAnd(Arg0, Sel);
469 Value *Sel1 = Builder.CreateAnd(Arg1, NotSel);
470 Rep = Builder.CreateOr(Sel0, Sel1);
471 } else if (Name == "llvm.x86.sse42.crc32.64.8") {
472 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
473 Intrinsic::x86_sse42_crc32_32_8);
474 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
475 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
476 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
477 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) {
478 // Replace broadcasts with a series of insertelements.
479 Type *VecTy = CI->getType();
480 Type *EltTy = VecTy->getVectorElementType();
481 unsigned EltNum = VecTy->getVectorNumElements();
482 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
483 EltTy->getPointerTo());
484 Value *Load = Builder.CreateLoad(EltTy, Cast);
485 Type *I32Ty = Type::getInt32Ty(C);
486 Rep = UndefValue::get(VecTy);
487 for (unsigned I = 0; I < EltNum; ++I)
488 Rep = Builder.CreateInsertElement(Rep, Load,
489 ConstantInt::get(I32Ty, I));
490 } else if (Name.startswith("llvm.x86.sse41.pmovsx")) {
491 VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
492 VectorType *DstTy = cast<VectorType>(CI->getType());
493 unsigned NumDstElts = DstTy->getNumElements();
495 // Extract a subvector of the first NumDstElts lanes and sign extend.
496 SmallVector<int, 8> ShuffleMask;
497 for (int i = 0; i != (int)NumDstElts; ++i)
498 ShuffleMask.push_back(i);
500 Value *SV = Builder.CreateShuffleVector(
501 CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
502 Rep = Builder.CreateSExt(SV, DstTy);
503 } else if (Name == "llvm.x86.avx2.vbroadcasti128") {
504 // Replace vbroadcasts with a vector shuffle.
505 Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
506 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
507 PointerType::getUnqual(VT));
508 Value *Load = Builder.CreateLoad(VT, Op);
509 const int Idxs[4] = { 0, 1, 0, 1 };
510 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
512 } else if (Name.startswith("llvm.x86.avx2.pbroadcast") ||
513 Name.startswith("llvm.x86.avx2.vbroadcast")) {
514 // Replace vp?broadcasts with a vector shuffle.
515 Value *Op = CI->getArgOperand(0);
516 unsigned NumElts = CI->getType()->getVectorNumElements();
517 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
518 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
519 Constant::getNullValue(MaskTy));
520 } else if (Name == "llvm.x86.sse2.psll.dq") {
521 // 128-bit shift left specified in bits.
522 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
523 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
524 Shift / 8); // Shift is in bits.
525 } else if (Name == "llvm.x86.sse2.psrl.dq") {
526 // 128-bit shift right specified in bits.
527 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
528 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
529 Shift / 8); // Shift is in bits.
530 } else if (Name == "llvm.x86.avx2.psll.dq") {
531 // 256-bit shift left specified in bits.
532 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
533 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
534 Shift / 8); // Shift is in bits.
535 } else if (Name == "llvm.x86.avx2.psrl.dq") {
536 // 256-bit shift right specified in bits.
537 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
538 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
539 Shift / 8); // Shift is in bits.
540 } else if (Name == "llvm.x86.sse2.psll.dq.bs") {
541 // 128-bit shift left specified in bytes.
542 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
543 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
545 } else if (Name == "llvm.x86.sse2.psrl.dq.bs") {
546 // 128-bit shift right specified in bytes.
547 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
548 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
550 } else if (Name == "llvm.x86.avx2.psll.dq.bs") {
551 // 256-bit shift left specified in bytes.
552 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
553 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
555 } else if (Name == "llvm.x86.avx2.psrl.dq.bs") {
556 // 256-bit shift right specified in bytes.
557 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
558 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
560 } else if (Name == "llvm.x86.sse41.pblendw" ||
561 Name == "llvm.x86.sse41.blendpd" ||
562 Name == "llvm.x86.sse41.blendps" ||
563 Name == "llvm.x86.avx.blend.pd.256" ||
564 Name == "llvm.x86.avx.blend.ps.256" ||
565 Name == "llvm.x86.avx2.pblendw" ||
566 Name == "llvm.x86.avx2.pblendd.128" ||
567 Name == "llvm.x86.avx2.pblendd.256") {
568 Value *Op0 = CI->getArgOperand(0);
569 Value *Op1 = CI->getArgOperand(1);
570 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
571 VectorType *VecTy = cast<VectorType>(CI->getType());
572 unsigned NumElts = VecTy->getNumElements();
574 SmallVector<Constant*, 16> Idxs;
575 for (unsigned i = 0; i != NumElts; ++i) {
576 unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
577 Idxs.push_back(Builder.getInt32(Idx));
580 Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
581 } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" ||
582 Name == "llvm.x86.avx.vinsertf128.ps.256" ||
583 Name == "llvm.x86.avx.vinsertf128.si.256" ||
584 Name == "llvm.x86.avx2.vinserti128") {
585 Value *Op0 = CI->getArgOperand(0);
586 Value *Op1 = CI->getArgOperand(1);
587 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
588 VectorType *VecTy = cast<VectorType>(CI->getType());
589 unsigned NumElts = VecTy->getNumElements();
591 // Mask off the high bits of the immediate value; hardware ignores those.
594 // Extend the second operand into a vector that is twice as big.
595 Value *UndefV = UndefValue::get(Op1->getType());
596 SmallVector<Constant*, 8> Idxs;
597 for (unsigned i = 0; i != NumElts; ++i) {
598 Idxs.push_back(Builder.getInt32(i));
600 Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs));
602 // Insert the second operand into the first operand.
604 // Note that there is no guarantee that instruction lowering will actually
605 // produce a vinsertf128 instruction for the created shuffles. In
606 // particular, the 0 immediate case involves no lane changes, so it can
607 // be handled as a blend.
609 // Example of shuffle mask for 32-bit elements:
610 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
611 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
613 SmallVector<Constant*, 8> Idxs2;
614 // The low half of the result is either the low half of the 1st operand
615 // or the low half of the 2nd operand (the inserted vector).
616 for (unsigned i = 0; i != NumElts / 2; ++i) {
617 unsigned Idx = Imm ? i : (i + NumElts);
618 Idxs2.push_back(Builder.getInt32(Idx));
620 // The high half of the result is either the low half of the 2nd operand
621 // (the inserted vector) or the high half of the 1st operand.
622 for (unsigned i = NumElts / 2; i != NumElts; ++i) {
623 unsigned Idx = Imm ? (i + NumElts / 2) : i;
624 Idxs2.push_back(Builder.getInt32(Idx));
626 Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
627 } else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
628 Name == "llvm.x86.avx.vextractf128.ps.256" ||
629 Name == "llvm.x86.avx.vextractf128.si.256" ||
630 Name == "llvm.x86.avx2.vextracti128") {
631 Value *Op0 = CI->getArgOperand(0);
632 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
633 VectorType *VecTy = cast<VectorType>(CI->getType());
634 unsigned NumElts = VecTy->getNumElements();
636 // Mask off the high bits of the immediate value; hardware ignores those.
639 // Get indexes for either the high half or low half of the input vector.
640 SmallVector<Constant*, 4> Idxs(NumElts);
641 for (unsigned i = 0; i != NumElts; ++i) {
642 unsigned Idx = Imm ? (i + NumElts) : i;
643 Idxs[i] = Builder.getInt32(Idx);
646 Value *UndefV = UndefValue::get(Op0->getType());
647 Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs));
649 bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
650 if (Name == "llvm.x86.avx.vpermil.pd.256")
652 else if (Name == "llvm.x86.avx.vpermil.pd")
654 else if (Name == "llvm.x86.avx.vpermil.ps.256")
656 else if (Name == "llvm.x86.avx.vpermil.ps")
659 if (PD256 || PD128 || PS256 || PS128) {
660 Value *Op0 = CI->getArgOperand(0);
661 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
662 SmallVector<Constant*, 8> Idxs;
665 for (unsigned i = 0; i != 2; ++i)
666 Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1));
668 for (unsigned l = 0; l != 4; l+=2)
669 for (unsigned i = 0; i != 2; ++i)
670 Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l));
672 for (unsigned i = 0; i != 4; ++i)
673 Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3));
675 for (unsigned l = 0; l != 8; l+=4)
676 for (unsigned i = 0; i != 4; ++i)
677 Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l));
679 llvm_unreachable("Unexpected function");
681 Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs));
683 llvm_unreachable("Unknown function for CallInst upgrade.");
687 CI->replaceAllUsesWith(Rep);
688 CI->eraseFromParent();
692 std::string Name = CI->getName();
694 CI->setName(Name + ".old");
696 switch (NewFn->getIntrinsicID()) {
698 llvm_unreachable("Unknown function for CallInst upgrade.");
700 case Intrinsic::arm_neon_vld1:
701 case Intrinsic::arm_neon_vld2:
702 case Intrinsic::arm_neon_vld3:
703 case Intrinsic::arm_neon_vld4:
704 case Intrinsic::arm_neon_vld2lane:
705 case Intrinsic::arm_neon_vld3lane:
706 case Intrinsic::arm_neon_vld4lane:
707 case Intrinsic::arm_neon_vst1:
708 case Intrinsic::arm_neon_vst2:
709 case Intrinsic::arm_neon_vst3:
710 case Intrinsic::arm_neon_vst4:
711 case Intrinsic::arm_neon_vst2lane:
712 case Intrinsic::arm_neon_vst3lane:
713 case Intrinsic::arm_neon_vst4lane: {
714 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
715 CI->arg_operands().end());
716 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
717 CI->eraseFromParent();
721 case Intrinsic::ctlz:
722 case Intrinsic::cttz:
723 assert(CI->getNumArgOperands() == 1 &&
724 "Mismatch between function args and call args");
725 CI->replaceAllUsesWith(Builder.CreateCall(
726 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name));
727 CI->eraseFromParent();
730 case Intrinsic::objectsize:
731 CI->replaceAllUsesWith(Builder.CreateCall(
732 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name));
733 CI->eraseFromParent();
736 case Intrinsic::ctpop: {
737 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
738 CI->eraseFromParent();
742 case Intrinsic::x86_xop_vfrcz_ss:
743 case Intrinsic::x86_xop_vfrcz_sd:
744 CI->replaceAllUsesWith(
745 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name));
746 CI->eraseFromParent();
749 case Intrinsic::x86_sse41_ptestc:
750 case Intrinsic::x86_sse41_ptestz:
751 case Intrinsic::x86_sse41_ptestnzc: {
752 // The arguments for these intrinsics used to be v4f32, and changed
753 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
754 // So, the only thing required is a bitcast for both arguments.
755 // First, check the arguments have the old type.
756 Value *Arg0 = CI->getArgOperand(0);
757 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
760 // Old intrinsic, add bitcasts
761 Value *Arg1 = CI->getArgOperand(1);
763 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
765 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
766 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
768 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name);
769 CI->replaceAllUsesWith(NewCall);
770 CI->eraseFromParent();
774 case Intrinsic::x86_sse41_insertps:
775 case Intrinsic::x86_sse41_dppd:
776 case Intrinsic::x86_sse41_dpps:
777 case Intrinsic::x86_sse41_mpsadbw:
778 case Intrinsic::x86_avx_dp_ps_256:
779 case Intrinsic::x86_avx2_mpsadbw: {
780 // Need to truncate the last argument from i32 to i8 -- this argument models
781 // an inherently 8-bit immediate operand to these x86 instructions.
782 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
783 CI->arg_operands().end());
785 // Replace the last argument with a trunc.
786 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
788 CallInst *NewCall = Builder.CreateCall(NewFn, Args);
789 CI->replaceAllUsesWith(NewCall);
790 CI->eraseFromParent();
796 // This tests each Function to determine if it needs upgrading. When we find
797 // one we are interested in, we then upgrade all calls to reflect the new
799 void llvm::UpgradeCallsToIntrinsic(Function* F) {
800 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
802 // Upgrade the function and check if it is a totaly new function.
804 if (UpgradeIntrinsicFunction(F, NewFn)) {
805 // Replace all uses to the old function with the new one if necessary.
806 for (Value::user_iterator UI = F->user_begin(), UE = F->user_end();
808 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
809 UpgradeIntrinsicCall(CI, NewFn);
811 // Remove old function, no longer used, from the module.
812 F->eraseFromParent();
816 void llvm::UpgradeInstWithTBAATag(Instruction *I) {
817 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa);
818 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
819 // Check if the tag uses struct-path aware TBAA format.
820 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3)
823 if (MD->getNumOperands() == 3) {
824 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)};
825 MDNode *ScalarType = MDNode::get(I->getContext(), Elts);
826 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
827 Metadata *Elts2[] = {ScalarType, ScalarType,
828 ConstantAsMetadata::get(Constant::getNullValue(
829 Type::getInt64Ty(I->getContext()))),
831 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2));
833 // Create a MDNode <MD, MD, offset 0>
834 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue(
835 Type::getInt64Ty(I->getContext())))};
836 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts));
840 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
841 Instruction *&Temp) {
842 if (Opc != Instruction::BitCast)
846 Type *SrcTy = V->getType();
847 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
848 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
849 LLVMContext &Context = V->getContext();
851 // We have no information about target data layout, so we assume that
852 // the maximum pointer size is 64bit.
853 Type *MidTy = Type::getInt64Ty(Context);
854 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
856 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
862 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
863 if (Opc != Instruction::BitCast)
866 Type *SrcTy = C->getType();
867 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
868 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
869 LLVMContext &Context = C->getContext();
871 // We have no information about target data layout, so we assume that
872 // the maximum pointer size is 64bit.
873 Type *MidTy = Type::getInt64Ty(Context);
875 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
882 /// Check the debug info version number, if it is out-dated, drop the debug
883 /// info. Return true if module is modified.
884 bool llvm::UpgradeDebugInfo(Module &M) {
885 unsigned Version = getDebugMetadataVersionFromModule(M);
886 if (Version == DEBUG_METADATA_VERSION)
889 bool RetCode = StripDebugInfo(M);
891 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
892 M.getContext().diagnose(DiagVersion);
897 void llvm::UpgradeMDStringConstant(std::string &String) {
898 const std::string OldPrefix = "llvm.vectorizer.";
899 if (String == "llvm.vectorizer.unroll") {
900 String = "llvm.loop.interleave.count";
901 } else if (String.find(OldPrefix) == 0) {
902 String.replace(0, OldPrefix.size(), "llvm.loop.vectorize.");