1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/IR/CFG.h"
18 #include "llvm/IR/CallSite.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DiagnosticInfo.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Support/ErrorHandling.h"
33 // Upgrade the declarations of the SSE4.1 functions whose arguments have
34 // changed their type from v4f32 to v2i64.
35 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
37 // Check whether this is an old version of the function, which received
39 Type *Arg0Type = F->getFunctionType()->getParamType(0);
40 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
43 // Yes, it's old, replace it with new version.
44 F->setName(F->getName() + ".old");
45 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
49 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
50 // arguments have changed their type from i32 to i8.
51 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
53 // Check that the last argument is an i32.
54 Type *LastArgType = F->getFunctionType()->getParamType(
55 F->getFunctionType()->getNumParams() - 1);
56 if (!LastArgType->isIntegerTy(32))
59 // Move this function aside and map down.
60 F->setName(F->getName() + ".old");
61 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
65 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
66 assert(F && "Illegal to upgrade a non-existent Function.");
68 // Quickly eliminate it, if it's not a candidate.
69 StringRef Name = F->getName();
70 if (Name.size() <= 8 || !Name.startswith("llvm."))
72 Name = Name.substr(5); // Strip off "llvm."
77 if (Name.startswith("arm.neon.vclz")) {
79 F->arg_begin()->getType(),
80 Type::getInt1Ty(F->getContext())
82 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
83 // the end of the name. Change name from llvm.arm.neon.vclz.* to
85 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
86 NewFn = Function::Create(fType, F->getLinkage(),
87 "llvm.ctlz." + Name.substr(14), F->getParent());
90 if (Name.startswith("arm.neon.vcnt")) {
91 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
92 F->arg_begin()->getType());
98 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
99 F->setName(Name + ".old");
100 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
101 F->arg_begin()->getType());
104 if (Name.startswith("cttz.") && F->arg_size() == 1) {
105 F->setName(Name + ".old");
106 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
107 F->arg_begin()->getType());
114 // We only need to change the name to match the mangling including the
116 if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
117 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
118 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
119 F->setName(Name + ".old");
120 NewFn = Intrinsic::getDeclaration(F->getParent(),
121 Intrinsic::objectsize, Tys);
128 if (Name.startswith("x86.sse2.pcmpeq.") ||
129 Name.startswith("x86.sse2.pcmpgt.") ||
130 Name.startswith("x86.avx2.pcmpeq.") ||
131 Name.startswith("x86.avx2.pcmpgt.") ||
132 Name.startswith("x86.avx.vpermil.") ||
133 Name == "x86.avx.vinsertf128.pd.256" ||
134 Name == "x86.avx.vinsertf128.ps.256" ||
135 Name == "x86.avx.vinsertf128.si.256" ||
136 Name == "x86.avx2.vinserti128" ||
137 Name == "x86.avx.vextractf128.pd.256" ||
138 Name == "x86.avx.vextractf128.ps.256" ||
139 Name == "x86.avx.vextractf128.si.256" ||
140 Name == "x86.avx2.vextracti128" ||
141 Name == "x86.avx.movnt.dq.256" ||
142 Name == "x86.avx.movnt.pd.256" ||
143 Name == "x86.avx.movnt.ps.256" ||
144 Name == "x86.sse42.crc32.64.8" ||
145 Name == "x86.avx.vbroadcast.ss" ||
146 Name == "x86.avx.vbroadcast.ss.256" ||
147 Name == "x86.avx.vbroadcast.sd.256" ||
148 Name == "x86.sse2.psll.dq" ||
149 Name == "x86.sse2.psrl.dq" ||
150 Name == "x86.avx2.psll.dq" ||
151 Name == "x86.avx2.psrl.dq" ||
152 Name == "x86.sse2.psll.dq.bs" ||
153 Name == "x86.sse2.psrl.dq.bs" ||
154 Name == "x86.avx2.psll.dq.bs" ||
155 Name == "x86.avx2.psrl.dq.bs" ||
156 Name == "x86.sse41.pblendw" ||
157 Name == "x86.sse41.blendpd" ||
158 Name == "x86.sse41.blendps" ||
159 Name == "x86.avx.blend.pd.256" ||
160 Name == "x86.avx.blend.ps.256" ||
161 Name == "x86.avx2.pblendw" ||
162 Name == "x86.avx2.pblendd.128" ||
163 Name == "x86.avx2.pblendd.256" ||
164 Name == "x86.avx2.vbroadcasti128" ||
165 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
169 // SSE4.1 ptest functions may have an old signature.
170 if (Name.startswith("x86.sse41.ptest")) {
171 if (Name == "x86.sse41.ptestc")
172 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
173 if (Name == "x86.sse41.ptestz")
174 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
175 if (Name == "x86.sse41.ptestnzc")
176 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
178 // Several blend and other instructions with masks used the wrong number of
180 if (Name == "x86.sse41.insertps")
181 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
183 if (Name == "x86.sse41.dppd")
184 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
186 if (Name == "x86.sse41.dpps")
187 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
189 if (Name == "x86.sse41.mpsadbw")
190 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
192 if (Name == "x86.avx.dp.ps.256")
193 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
195 if (Name == "x86.avx2.mpsadbw")
196 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
199 // frcz.ss/sd may need to have an argument dropped
200 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
201 F->setName(Name + ".old");
202 NewFn = Intrinsic::getDeclaration(F->getParent(),
203 Intrinsic::x86_xop_vfrcz_ss);
206 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
207 F->setName(Name + ".old");
208 NewFn = Intrinsic::getDeclaration(F->getParent(),
209 Intrinsic::x86_xop_vfrcz_sd);
212 // Fix the FMA4 intrinsics to remove the 4
213 if (Name.startswith("x86.fma4.")) {
214 F->setName("llvm.x86.fma" + Name.substr(8));
222 // This may not belong here. This function is effectively being overloaded
223 // to both detect an intrinsic which needs upgrading, and to provide the
224 // upgraded form of the intrinsic. We should perhaps have two separate
225 // functions for this.
229 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
231 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
232 assert(F != NewFn && "Intrinsic function upgraded to the same function");
234 // Upgrade intrinsic attributes. This does not change the function.
237 if (Intrinsic::ID id = F->getIntrinsicID())
238 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
242 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
243 // Nothing to do yet.
247 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them
249 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
250 Value *Op, unsigned NumLanes,
252 // Each lane is 16 bytes.
253 unsigned NumElts = NumLanes * 16;
255 // Bitcast from a 64-bit element type to a byte element type.
256 Op = Builder.CreateBitCast(Op,
257 VectorType::get(Type::getInt8Ty(C), NumElts),
259 // We'll be shuffling in zeroes.
260 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
262 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
263 // we'll just return the zero vector.
265 SmallVector<Constant*, 32> Idxs;
266 // 256-bit version is split into two 16-byte lanes.
267 for (unsigned l = 0; l != NumElts; l += 16)
268 for (unsigned i = 0; i != 16; ++i) {
269 unsigned Idx = NumElts + i - Shift;
271 Idx -= NumElts - 16; // end of lane, switch operand.
272 Idxs.push_back(Builder.getInt32(Idx + l));
275 Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs));
278 // Bitcast back to a 64-bit element type.
279 return Builder.CreateBitCast(Res,
280 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
284 // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them
286 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
287 Value *Op, unsigned NumLanes,
289 // Each lane is 16 bytes.
290 unsigned NumElts = NumLanes * 16;
292 // Bitcast from a 64-bit element type to a byte element type.
293 Op = Builder.CreateBitCast(Op,
294 VectorType::get(Type::getInt8Ty(C), NumElts),
296 // We'll be shuffling in zeroes.
297 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
299 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
300 // we'll just return the zero vector.
302 SmallVector<Constant*, 32> Idxs;
303 // 256-bit version is split into two 16-byte lanes.
304 for (unsigned l = 0; l != NumElts; l += 16)
305 for (unsigned i = 0; i != 16; ++i) {
306 unsigned Idx = i + Shift;
308 Idx += NumElts - 16; // end of lane, switch operand.
309 Idxs.push_back(Builder.getInt32(Idx + l));
312 Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs));
315 // Bitcast back to a 64-bit element type.
316 return Builder.CreateBitCast(Res,
317 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
321 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
322 // upgraded intrinsic. All argument and return casting must be provided in
323 // order to seamlessly integrate with existing context.
324 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
325 Function *F = CI->getCalledFunction();
326 LLVMContext &C = CI->getContext();
327 IRBuilder<> Builder(C);
328 Builder.SetInsertPoint(CI->getParent(), CI);
330 assert(F && "Intrinsic call is not direct?");
333 // Get the Function's name.
334 StringRef Name = F->getName();
337 // Upgrade packed integer vector compares intrinsics to compare instructions
338 if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
339 Name.startswith("llvm.x86.avx2.pcmpeq.")) {
340 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
342 // need to sign extend since icmp returns vector of i1
343 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
344 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
345 Name.startswith("llvm.x86.avx2.pcmpgt.")) {
346 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
348 // need to sign extend since icmp returns vector of i1
349 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
350 } else if (Name == "llvm.x86.avx.movnt.dq.256" ||
351 Name == "llvm.x86.avx.movnt.ps.256" ||
352 Name == "llvm.x86.avx.movnt.pd.256") {
353 IRBuilder<> Builder(C);
354 Builder.SetInsertPoint(CI->getParent(), CI);
356 Module *M = F->getParent();
357 SmallVector<Metadata *, 1> Elts;
359 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
360 MDNode *Node = MDNode::get(C, Elts);
362 Value *Arg0 = CI->getArgOperand(0);
363 Value *Arg1 = CI->getArgOperand(1);
365 // Convert the type of the pointer to a pointer to the stored type.
366 Value *BC = Builder.CreateBitCast(Arg0,
367 PointerType::getUnqual(Arg1->getType()),
369 StoreInst *SI = Builder.CreateStore(Arg1, BC);
370 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
371 SI->setAlignment(16);
374 CI->eraseFromParent();
376 } else if (Name.startswith("llvm.x86.xop.vpcom")) {
378 if (Name.endswith("ub"))
379 intID = Intrinsic::x86_xop_vpcomub;
380 else if (Name.endswith("uw"))
381 intID = Intrinsic::x86_xop_vpcomuw;
382 else if (Name.endswith("ud"))
383 intID = Intrinsic::x86_xop_vpcomud;
384 else if (Name.endswith("uq"))
385 intID = Intrinsic::x86_xop_vpcomuq;
386 else if (Name.endswith("b"))
387 intID = Intrinsic::x86_xop_vpcomb;
388 else if (Name.endswith("w"))
389 intID = Intrinsic::x86_xop_vpcomw;
390 else if (Name.endswith("d"))
391 intID = Intrinsic::x86_xop_vpcomd;
392 else if (Name.endswith("q"))
393 intID = Intrinsic::x86_xop_vpcomq;
395 llvm_unreachable("Unknown suffix");
397 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom"
399 if (Name.startswith("lt"))
401 else if (Name.startswith("le"))
403 else if (Name.startswith("gt"))
405 else if (Name.startswith("ge"))
407 else if (Name.startswith("eq"))
409 else if (Name.startswith("ne"))
411 else if (Name.startswith("false"))
413 else if (Name.startswith("true"))
416 llvm_unreachable("Unknown condition");
418 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
420 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
421 Builder.getInt8(Imm)});
422 } else if (Name == "llvm.x86.sse42.crc32.64.8") {
423 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
424 Intrinsic::x86_sse42_crc32_32_8);
425 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
426 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
427 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
428 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) {
429 // Replace broadcasts with a series of insertelements.
430 Type *VecTy = CI->getType();
431 Type *EltTy = VecTy->getVectorElementType();
432 unsigned EltNum = VecTy->getVectorNumElements();
433 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
434 EltTy->getPointerTo());
435 Value *Load = Builder.CreateLoad(EltTy, Cast);
436 Type *I32Ty = Type::getInt32Ty(C);
437 Rep = UndefValue::get(VecTy);
438 for (unsigned I = 0; I < EltNum; ++I)
439 Rep = Builder.CreateInsertElement(Rep, Load,
440 ConstantInt::get(I32Ty, I));
441 } else if (Name == "llvm.x86.avx2.vbroadcasti128") {
442 // Replace vbroadcasts with a vector shuffle.
443 Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
444 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
445 PointerType::getUnqual(VT));
446 Value *Load = Builder.CreateLoad(VT, Op);
447 const int Idxs[4] = { 0, 1, 0, 1 };
448 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
450 } else if (Name == "llvm.x86.sse2.psll.dq") {
451 // 128-bit shift left specified in bits.
452 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
453 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
454 Shift / 8); // Shift is in bits.
455 } else if (Name == "llvm.x86.sse2.psrl.dq") {
456 // 128-bit shift right specified in bits.
457 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
458 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
459 Shift / 8); // Shift is in bits.
460 } else if (Name == "llvm.x86.avx2.psll.dq") {
461 // 256-bit shift left specified in bits.
462 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
463 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
464 Shift / 8); // Shift is in bits.
465 } else if (Name == "llvm.x86.avx2.psrl.dq") {
466 // 256-bit shift right specified in bits.
467 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
468 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
469 Shift / 8); // Shift is in bits.
470 } else if (Name == "llvm.x86.sse2.psll.dq.bs") {
471 // 128-bit shift left specified in bytes.
472 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
473 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
475 } else if (Name == "llvm.x86.sse2.psrl.dq.bs") {
476 // 128-bit shift right specified in bytes.
477 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
478 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
480 } else if (Name == "llvm.x86.avx2.psll.dq.bs") {
481 // 256-bit shift left specified in bytes.
482 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
483 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
485 } else if (Name == "llvm.x86.avx2.psrl.dq.bs") {
486 // 256-bit shift right specified in bytes.
487 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
488 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
490 } else if (Name == "llvm.x86.sse41.pblendw" ||
491 Name == "llvm.x86.sse41.blendpd" ||
492 Name == "llvm.x86.sse41.blendps" ||
493 Name == "llvm.x86.avx.blend.pd.256" ||
494 Name == "llvm.x86.avx.blend.ps.256" ||
495 Name == "llvm.x86.avx2.pblendw" ||
496 Name == "llvm.x86.avx2.pblendd.128" ||
497 Name == "llvm.x86.avx2.pblendd.256") {
498 Value *Op0 = CI->getArgOperand(0);
499 Value *Op1 = CI->getArgOperand(1);
500 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
501 VectorType *VecTy = cast<VectorType>(CI->getType());
502 unsigned NumElts = VecTy->getNumElements();
504 SmallVector<Constant*, 16> Idxs;
505 for (unsigned i = 0; i != NumElts; ++i) {
506 unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
507 Idxs.push_back(Builder.getInt32(Idx));
510 Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
511 } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" ||
512 Name == "llvm.x86.avx.vinsertf128.ps.256" ||
513 Name == "llvm.x86.avx.vinsertf128.si.256" ||
514 Name == "llvm.x86.avx2.vinserti128") {
515 Value *Op0 = CI->getArgOperand(0);
516 Value *Op1 = CI->getArgOperand(1);
517 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
518 VectorType *VecTy = cast<VectorType>(CI->getType());
519 unsigned NumElts = VecTy->getNumElements();
521 // Mask off the high bits of the immediate value; hardware ignores those.
524 // Extend the second operand into a vector that is twice as big.
525 Value *UndefV = UndefValue::get(Op1->getType());
526 SmallVector<Constant*, 8> Idxs;
527 for (unsigned i = 0; i != NumElts; ++i) {
528 Idxs.push_back(Builder.getInt32(i));
530 Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs));
532 // Insert the second operand into the first operand.
534 // Note that there is no guarantee that instruction lowering will actually
535 // produce a vinsertf128 instruction for the created shuffles. In
536 // particular, the 0 immediate case involves no lane changes, so it can
537 // be handled as a blend.
539 // Example of shuffle mask for 32-bit elements:
540 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
541 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
543 SmallVector<Constant*, 8> Idxs2;
544 // The low half of the result is either the low half of the 1st operand
545 // or the low half of the 2nd operand (the inserted vector).
546 for (unsigned i = 0; i != NumElts / 2; ++i) {
547 unsigned Idx = Imm ? i : (i + NumElts);
548 Idxs2.push_back(Builder.getInt32(Idx));
550 // The high half of the result is either the low half of the 2nd operand
551 // (the inserted vector) or the high half of the 1st operand.
552 for (unsigned i = NumElts / 2; i != NumElts; ++i) {
553 unsigned Idx = Imm ? (i + NumElts / 2) : i;
554 Idxs2.push_back(Builder.getInt32(Idx));
556 Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
557 } else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
558 Name == "llvm.x86.avx.vextractf128.ps.256" ||
559 Name == "llvm.x86.avx.vextractf128.si.256" ||
560 Name == "llvm.x86.avx2.vextracti128") {
561 Value *Op0 = CI->getArgOperand(0);
562 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
563 VectorType *VecTy = cast<VectorType>(CI->getType());
564 unsigned NumElts = VecTy->getNumElements();
566 // Mask off the high bits of the immediate value; hardware ignores those.
569 // Get indexes for either the high half or low half of the input vector.
570 SmallVector<Constant*, 4> Idxs(NumElts);
571 for (unsigned i = 0; i != NumElts; ++i) {
572 unsigned Idx = Imm ? (i + NumElts) : i;
573 Idxs[i] = Builder.getInt32(Idx);
576 Value *UndefV = UndefValue::get(Op0->getType());
577 Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs));
579 bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
580 if (Name == "llvm.x86.avx.vpermil.pd.256")
582 else if (Name == "llvm.x86.avx.vpermil.pd")
584 else if (Name == "llvm.x86.avx.vpermil.ps.256")
586 else if (Name == "llvm.x86.avx.vpermil.ps")
589 if (PD256 || PD128 || PS256 || PS128) {
590 Value *Op0 = CI->getArgOperand(0);
591 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
592 SmallVector<Constant*, 8> Idxs;
595 for (unsigned i = 0; i != 2; ++i)
596 Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1));
598 for (unsigned l = 0; l != 4; l+=2)
599 for (unsigned i = 0; i != 2; ++i)
600 Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l));
602 for (unsigned i = 0; i != 4; ++i)
603 Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3));
605 for (unsigned l = 0; l != 8; l+=4)
606 for (unsigned i = 0; i != 4; ++i)
607 Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l));
609 llvm_unreachable("Unexpected function");
611 Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs));
613 llvm_unreachable("Unknown function for CallInst upgrade.");
617 CI->replaceAllUsesWith(Rep);
618 CI->eraseFromParent();
622 std::string Name = CI->getName();
624 CI->setName(Name + ".old");
626 switch (NewFn->getIntrinsicID()) {
628 llvm_unreachable("Unknown function for CallInst upgrade.");
630 case Intrinsic::ctlz:
631 case Intrinsic::cttz:
632 assert(CI->getNumArgOperands() == 1 &&
633 "Mismatch between function args and call args");
634 CI->replaceAllUsesWith(Builder.CreateCall(
635 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name));
636 CI->eraseFromParent();
639 case Intrinsic::objectsize:
640 CI->replaceAllUsesWith(Builder.CreateCall(
641 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name));
642 CI->eraseFromParent();
645 case Intrinsic::ctpop: {
646 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
647 CI->eraseFromParent();
651 case Intrinsic::x86_xop_vfrcz_ss:
652 case Intrinsic::x86_xop_vfrcz_sd:
653 CI->replaceAllUsesWith(
654 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name));
655 CI->eraseFromParent();
658 case Intrinsic::x86_sse41_ptestc:
659 case Intrinsic::x86_sse41_ptestz:
660 case Intrinsic::x86_sse41_ptestnzc: {
661 // The arguments for these intrinsics used to be v4f32, and changed
662 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
663 // So, the only thing required is a bitcast for both arguments.
664 // First, check the arguments have the old type.
665 Value *Arg0 = CI->getArgOperand(0);
666 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
669 // Old intrinsic, add bitcasts
670 Value *Arg1 = CI->getArgOperand(1);
672 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
674 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
675 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
677 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name);
678 CI->replaceAllUsesWith(NewCall);
679 CI->eraseFromParent();
683 case Intrinsic::x86_sse41_insertps:
684 case Intrinsic::x86_sse41_dppd:
685 case Intrinsic::x86_sse41_dpps:
686 case Intrinsic::x86_sse41_mpsadbw:
687 case Intrinsic::x86_avx_dp_ps_256:
688 case Intrinsic::x86_avx2_mpsadbw: {
689 // Need to truncate the last argument from i32 to i8 -- this argument models
690 // an inherently 8-bit immediate operand to these x86 instructions.
691 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
692 CI->arg_operands().end());
694 // Replace the last argument with a trunc.
695 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
697 CallInst *NewCall = Builder.CreateCall(NewFn, Args);
698 CI->replaceAllUsesWith(NewCall);
699 CI->eraseFromParent();
705 // This tests each Function to determine if it needs upgrading. When we find
706 // one we are interested in, we then upgrade all calls to reflect the new
708 void llvm::UpgradeCallsToIntrinsic(Function* F) {
709 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
711 // Upgrade the function and check if it is a totaly new function.
713 if (UpgradeIntrinsicFunction(F, NewFn)) {
714 // Replace all uses to the old function with the new one if necessary.
715 for (Value::user_iterator UI = F->user_begin(), UE = F->user_end();
717 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
718 UpgradeIntrinsicCall(CI, NewFn);
720 // Remove old function, no longer used, from the module.
721 F->eraseFromParent();
725 void llvm::UpgradeInstWithTBAATag(Instruction *I) {
726 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa);
727 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
728 // Check if the tag uses struct-path aware TBAA format.
729 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3)
732 if (MD->getNumOperands() == 3) {
733 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)};
734 MDNode *ScalarType = MDNode::get(I->getContext(), Elts);
735 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
736 Metadata *Elts2[] = {ScalarType, ScalarType,
737 ConstantAsMetadata::get(Constant::getNullValue(
738 Type::getInt64Ty(I->getContext()))),
740 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2));
742 // Create a MDNode <MD, MD, offset 0>
743 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue(
744 Type::getInt64Ty(I->getContext())))};
745 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts));
749 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
750 Instruction *&Temp) {
751 if (Opc != Instruction::BitCast)
755 Type *SrcTy = V->getType();
756 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
757 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
758 LLVMContext &Context = V->getContext();
760 // We have no information about target data layout, so we assume that
761 // the maximum pointer size is 64bit.
762 Type *MidTy = Type::getInt64Ty(Context);
763 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
765 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
771 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
772 if (Opc != Instruction::BitCast)
775 Type *SrcTy = C->getType();
776 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
777 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
778 LLVMContext &Context = C->getContext();
780 // We have no information about target data layout, so we assume that
781 // the maximum pointer size is 64bit.
782 Type *MidTy = Type::getInt64Ty(Context);
784 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
791 /// Check the debug info version number, if it is out-dated, drop the debug
792 /// info. Return true if module is modified.
793 bool llvm::UpgradeDebugInfo(Module &M) {
794 unsigned Version = getDebugMetadataVersionFromModule(M);
795 if (Version == DEBUG_METADATA_VERSION)
798 bool RetCode = StripDebugInfo(M);
800 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
801 M.getContext().diagnose(DiagVersion);
806 void llvm::UpgradeMDStringConstant(std::string &String) {
807 const std::string OldPrefix = "llvm.vectorizer.";
808 if (String == "llvm.vectorizer.unroll") {
809 String = "llvm.loop.interleave.count";
810 } else if (String.find(OldPrefix) == 0) {
811 String.replace(0, OldPrefix.size(), "llvm.loop.vectorize.");