1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/IR/CFG.h"
18 #include "llvm/IR/CallSite.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DiagnosticInfo.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Support/ErrorHandling.h"
33 // Upgrade the declarations of the SSE4.1 functions whose arguments have
34 // changed their type from v4f32 to v2i64.
35 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
37 // Check whether this is an old version of the function, which received
39 Type *Arg0Type = F->getFunctionType()->getParamType(0);
40 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
43 // Yes, it's old, replace it with new version.
44 F->setName(F->getName() + ".old");
45 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
49 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
50 // arguments have changed their type from i32 to i8.
51 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
53 // Check that the last argument is an i32.
54 Type *LastArgType = F->getFunctionType()->getParamType(
55 F->getFunctionType()->getNumParams() - 1);
56 if (!LastArgType->isIntegerTy(32))
59 // Move this function aside and map down.
60 F->setName(F->getName() + ".old");
61 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
65 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
66 assert(F && "Illegal to upgrade a non-existent Function.");
68 // Quickly eliminate it, if it's not a candidate.
69 StringRef Name = F->getName();
70 if (Name.size() <= 8 || !Name.startswith("llvm."))
72 Name = Name.substr(5); // Strip off "llvm."
77 if (Name.startswith("arm.neon.vclz")) {
79 F->arg_begin()->getType(),
80 Type::getInt1Ty(F->getContext())
82 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
83 // the end of the name. Change name from llvm.arm.neon.vclz.* to
85 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
86 NewFn = Function::Create(fType, F->getLinkage(),
87 "llvm.ctlz." + Name.substr(14), F->getParent());
90 if (Name.startswith("arm.neon.vcnt")) {
91 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
92 F->arg_begin()->getType());
98 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
99 F->setName(Name + ".old");
100 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
101 F->arg_begin()->getType());
104 if (Name.startswith("cttz.") && F->arg_size() == 1) {
105 F->setName(Name + ".old");
106 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
107 F->arg_begin()->getType());
114 // We only need to change the name to match the mangling including the
116 if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
117 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
118 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
119 F->setName(Name + ".old");
120 NewFn = Intrinsic::getDeclaration(F->getParent(),
121 Intrinsic::objectsize, Tys);
128 if (Name.startswith("x86.sse2.pcmpeq.") ||
129 Name.startswith("x86.sse2.pcmpgt.") ||
130 Name.startswith("x86.avx2.pcmpeq.") ||
131 Name.startswith("x86.avx2.pcmpgt.") ||
132 Name.startswith("x86.avx2.vbroadcast") ||
133 Name.startswith("x86.avx2.pbroadcast") ||
134 Name.startswith("x86.avx.vpermil.") ||
135 Name == "x86.avx.vinsertf128.pd.256" ||
136 Name == "x86.avx.vinsertf128.ps.256" ||
137 Name == "x86.avx.vinsertf128.si.256" ||
138 Name == "x86.avx2.vinserti128" ||
139 Name == "x86.avx.vextractf128.pd.256" ||
140 Name == "x86.avx.vextractf128.ps.256" ||
141 Name == "x86.avx.vextractf128.si.256" ||
142 Name == "x86.avx2.vextracti128" ||
143 Name == "x86.avx.movnt.dq.256" ||
144 Name == "x86.avx.movnt.pd.256" ||
145 Name == "x86.avx.movnt.ps.256" ||
146 Name == "x86.sse42.crc32.64.8" ||
147 Name == "x86.avx.vbroadcast.ss" ||
148 Name == "x86.avx.vbroadcast.ss.256" ||
149 Name == "x86.avx.vbroadcast.sd.256" ||
150 Name == "x86.sse2.psll.dq" ||
151 Name == "x86.sse2.psrl.dq" ||
152 Name == "x86.avx2.psll.dq" ||
153 Name == "x86.avx2.psrl.dq" ||
154 Name == "x86.sse2.psll.dq.bs" ||
155 Name == "x86.sse2.psrl.dq.bs" ||
156 Name == "x86.avx2.psll.dq.bs" ||
157 Name == "x86.avx2.psrl.dq.bs" ||
158 Name == "x86.sse41.pblendw" ||
159 Name == "x86.sse41.blendpd" ||
160 Name == "x86.sse41.blendps" ||
161 Name == "x86.avx.blend.pd.256" ||
162 Name == "x86.avx.blend.ps.256" ||
163 Name == "x86.avx2.pblendw" ||
164 Name == "x86.avx2.pblendd.128" ||
165 Name == "x86.avx2.pblendd.256" ||
166 Name == "x86.avx2.vbroadcasti128" ||
167 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
171 // SSE4.1 ptest functions may have an old signature.
172 if (Name.startswith("x86.sse41.ptest")) {
173 if (Name == "x86.sse41.ptestc")
174 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
175 if (Name == "x86.sse41.ptestz")
176 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
177 if (Name == "x86.sse41.ptestnzc")
178 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
180 // Several blend and other instructions with masks used the wrong number of
182 if (Name == "x86.sse41.insertps")
183 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
185 if (Name == "x86.sse41.dppd")
186 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
188 if (Name == "x86.sse41.dpps")
189 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
191 if (Name == "x86.sse41.mpsadbw")
192 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
194 if (Name == "x86.avx.dp.ps.256")
195 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
197 if (Name == "x86.avx2.mpsadbw")
198 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
201 // frcz.ss/sd may need to have an argument dropped
202 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
203 F->setName(Name + ".old");
204 NewFn = Intrinsic::getDeclaration(F->getParent(),
205 Intrinsic::x86_xop_vfrcz_ss);
208 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
209 F->setName(Name + ".old");
210 NewFn = Intrinsic::getDeclaration(F->getParent(),
211 Intrinsic::x86_xop_vfrcz_sd);
214 // Fix the FMA4 intrinsics to remove the 4
215 if (Name.startswith("x86.fma4.")) {
216 F->setName("llvm.x86.fma" + Name.substr(8));
224 // This may not belong here. This function is effectively being overloaded
225 // to both detect an intrinsic which needs upgrading, and to provide the
226 // upgraded form of the intrinsic. We should perhaps have two separate
227 // functions for this.
231 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
233 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
234 assert(F != NewFn && "Intrinsic function upgraded to the same function");
236 // Upgrade intrinsic attributes. This does not change the function.
239 if (Intrinsic::ID id = F->getIntrinsicID())
240 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
244 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
245 // Nothing to do yet.
249 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them
251 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
252 Value *Op, unsigned NumLanes,
254 // Each lane is 16 bytes.
255 unsigned NumElts = NumLanes * 16;
257 // Bitcast from a 64-bit element type to a byte element type.
258 Op = Builder.CreateBitCast(Op,
259 VectorType::get(Type::getInt8Ty(C), NumElts),
261 // We'll be shuffling in zeroes.
262 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
264 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
265 // we'll just return the zero vector.
267 SmallVector<Constant*, 32> Idxs;
268 // 256-bit version is split into two 16-byte lanes.
269 for (unsigned l = 0; l != NumElts; l += 16)
270 for (unsigned i = 0; i != 16; ++i) {
271 unsigned Idx = NumElts + i - Shift;
273 Idx -= NumElts - 16; // end of lane, switch operand.
274 Idxs.push_back(Builder.getInt32(Idx + l));
277 Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs));
280 // Bitcast back to a 64-bit element type.
281 return Builder.CreateBitCast(Res,
282 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
286 // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them
288 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
289 Value *Op, unsigned NumLanes,
291 // Each lane is 16 bytes.
292 unsigned NumElts = NumLanes * 16;
294 // Bitcast from a 64-bit element type to a byte element type.
295 Op = Builder.CreateBitCast(Op,
296 VectorType::get(Type::getInt8Ty(C), NumElts),
298 // We'll be shuffling in zeroes.
299 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
301 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
302 // we'll just return the zero vector.
304 SmallVector<Constant*, 32> Idxs;
305 // 256-bit version is split into two 16-byte lanes.
306 for (unsigned l = 0; l != NumElts; l += 16)
307 for (unsigned i = 0; i != 16; ++i) {
308 unsigned Idx = i + Shift;
310 Idx += NumElts - 16; // end of lane, switch operand.
311 Idxs.push_back(Builder.getInt32(Idx + l));
314 Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs));
317 // Bitcast back to a 64-bit element type.
318 return Builder.CreateBitCast(Res,
319 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
323 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
324 // upgraded intrinsic. All argument and return casting must be provided in
325 // order to seamlessly integrate with existing context.
326 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
327 Function *F = CI->getCalledFunction();
328 LLVMContext &C = CI->getContext();
329 IRBuilder<> Builder(C);
330 Builder.SetInsertPoint(CI->getParent(), CI);
332 assert(F && "Intrinsic call is not direct?");
335 // Get the Function's name.
336 StringRef Name = F->getName();
339 // Upgrade packed integer vector compares intrinsics to compare instructions
340 if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
341 Name.startswith("llvm.x86.avx2.pcmpeq.")) {
342 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
344 // need to sign extend since icmp returns vector of i1
345 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
346 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
347 Name.startswith("llvm.x86.avx2.pcmpgt.")) {
348 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
350 // need to sign extend since icmp returns vector of i1
351 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
352 } else if (Name == "llvm.x86.avx.movnt.dq.256" ||
353 Name == "llvm.x86.avx.movnt.ps.256" ||
354 Name == "llvm.x86.avx.movnt.pd.256") {
355 IRBuilder<> Builder(C);
356 Builder.SetInsertPoint(CI->getParent(), CI);
358 Module *M = F->getParent();
359 SmallVector<Metadata *, 1> Elts;
361 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
362 MDNode *Node = MDNode::get(C, Elts);
364 Value *Arg0 = CI->getArgOperand(0);
365 Value *Arg1 = CI->getArgOperand(1);
367 // Convert the type of the pointer to a pointer to the stored type.
368 Value *BC = Builder.CreateBitCast(Arg0,
369 PointerType::getUnqual(Arg1->getType()),
371 StoreInst *SI = Builder.CreateStore(Arg1, BC);
372 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
373 SI->setAlignment(16);
376 CI->eraseFromParent();
378 } else if (Name.startswith("llvm.x86.xop.vpcom")) {
380 if (Name.endswith("ub"))
381 intID = Intrinsic::x86_xop_vpcomub;
382 else if (Name.endswith("uw"))
383 intID = Intrinsic::x86_xop_vpcomuw;
384 else if (Name.endswith("ud"))
385 intID = Intrinsic::x86_xop_vpcomud;
386 else if (Name.endswith("uq"))
387 intID = Intrinsic::x86_xop_vpcomuq;
388 else if (Name.endswith("b"))
389 intID = Intrinsic::x86_xop_vpcomb;
390 else if (Name.endswith("w"))
391 intID = Intrinsic::x86_xop_vpcomw;
392 else if (Name.endswith("d"))
393 intID = Intrinsic::x86_xop_vpcomd;
394 else if (Name.endswith("q"))
395 intID = Intrinsic::x86_xop_vpcomq;
397 llvm_unreachable("Unknown suffix");
399 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom"
401 if (Name.startswith("lt"))
403 else if (Name.startswith("le"))
405 else if (Name.startswith("gt"))
407 else if (Name.startswith("ge"))
409 else if (Name.startswith("eq"))
411 else if (Name.startswith("ne"))
413 else if (Name.startswith("false"))
415 else if (Name.startswith("true"))
418 llvm_unreachable("Unknown condition");
420 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
422 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
423 Builder.getInt8(Imm)});
424 } else if (Name == "llvm.x86.sse42.crc32.64.8") {
425 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
426 Intrinsic::x86_sse42_crc32_32_8);
427 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
428 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
429 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
430 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) {
431 // Replace broadcasts with a series of insertelements.
432 Type *VecTy = CI->getType();
433 Type *EltTy = VecTy->getVectorElementType();
434 unsigned EltNum = VecTy->getVectorNumElements();
435 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
436 EltTy->getPointerTo());
437 Value *Load = Builder.CreateLoad(EltTy, Cast);
438 Type *I32Ty = Type::getInt32Ty(C);
439 Rep = UndefValue::get(VecTy);
440 for (unsigned I = 0; I < EltNum; ++I)
441 Rep = Builder.CreateInsertElement(Rep, Load,
442 ConstantInt::get(I32Ty, I));
443 } else if (Name == "llvm.x86.avx2.vbroadcasti128") {
444 // Replace vbroadcasts with a vector shuffle.
445 Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
446 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
447 PointerType::getUnqual(VT));
448 Value *Load = Builder.CreateLoad(VT, Op);
449 const int Idxs[4] = { 0, 1, 0, 1 };
450 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
452 } else if (Name.startswith("llvm.x86.avx2.pbroadcast") ||
453 Name.startswith("llvm.x86.avx2.vbroadcast")) {
454 // Replace vp?broadcasts with a vector shuffle.
455 Value *Op = CI->getArgOperand(0);
456 unsigned NumElts = CI->getType()->getVectorNumElements();
457 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
458 Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
459 Constant::getNullValue(MaskTy));
460 } else if (Name == "llvm.x86.sse2.psll.dq") {
461 // 128-bit shift left specified in bits.
462 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
463 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
464 Shift / 8); // Shift is in bits.
465 } else if (Name == "llvm.x86.sse2.psrl.dq") {
466 // 128-bit shift right specified in bits.
467 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
468 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
469 Shift / 8); // Shift is in bits.
470 } else if (Name == "llvm.x86.avx2.psll.dq") {
471 // 256-bit shift left specified in bits.
472 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
473 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
474 Shift / 8); // Shift is in bits.
475 } else if (Name == "llvm.x86.avx2.psrl.dq") {
476 // 256-bit shift right specified in bits.
477 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
478 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
479 Shift / 8); // Shift is in bits.
480 } else if (Name == "llvm.x86.sse2.psll.dq.bs") {
481 // 128-bit shift left specified in bytes.
482 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
483 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
485 } else if (Name == "llvm.x86.sse2.psrl.dq.bs") {
486 // 128-bit shift right specified in bytes.
487 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
488 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
490 } else if (Name == "llvm.x86.avx2.psll.dq.bs") {
491 // 256-bit shift left specified in bytes.
492 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
493 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
495 } else if (Name == "llvm.x86.avx2.psrl.dq.bs") {
496 // 256-bit shift right specified in bytes.
497 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
498 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
500 } else if (Name == "llvm.x86.sse41.pblendw" ||
501 Name == "llvm.x86.sse41.blendpd" ||
502 Name == "llvm.x86.sse41.blendps" ||
503 Name == "llvm.x86.avx.blend.pd.256" ||
504 Name == "llvm.x86.avx.blend.ps.256" ||
505 Name == "llvm.x86.avx2.pblendw" ||
506 Name == "llvm.x86.avx2.pblendd.128" ||
507 Name == "llvm.x86.avx2.pblendd.256") {
508 Value *Op0 = CI->getArgOperand(0);
509 Value *Op1 = CI->getArgOperand(1);
510 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
511 VectorType *VecTy = cast<VectorType>(CI->getType());
512 unsigned NumElts = VecTy->getNumElements();
514 SmallVector<Constant*, 16> Idxs;
515 for (unsigned i = 0; i != NumElts; ++i) {
516 unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
517 Idxs.push_back(Builder.getInt32(Idx));
520 Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
521 } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" ||
522 Name == "llvm.x86.avx.vinsertf128.ps.256" ||
523 Name == "llvm.x86.avx.vinsertf128.si.256" ||
524 Name == "llvm.x86.avx2.vinserti128") {
525 Value *Op0 = CI->getArgOperand(0);
526 Value *Op1 = CI->getArgOperand(1);
527 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
528 VectorType *VecTy = cast<VectorType>(CI->getType());
529 unsigned NumElts = VecTy->getNumElements();
531 // Mask off the high bits of the immediate value; hardware ignores those.
534 // Extend the second operand into a vector that is twice as big.
535 Value *UndefV = UndefValue::get(Op1->getType());
536 SmallVector<Constant*, 8> Idxs;
537 for (unsigned i = 0; i != NumElts; ++i) {
538 Idxs.push_back(Builder.getInt32(i));
540 Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs));
542 // Insert the second operand into the first operand.
544 // Note that there is no guarantee that instruction lowering will actually
545 // produce a vinsertf128 instruction for the created shuffles. In
546 // particular, the 0 immediate case involves no lane changes, so it can
547 // be handled as a blend.
549 // Example of shuffle mask for 32-bit elements:
550 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
551 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
553 SmallVector<Constant*, 8> Idxs2;
554 // The low half of the result is either the low half of the 1st operand
555 // or the low half of the 2nd operand (the inserted vector).
556 for (unsigned i = 0; i != NumElts / 2; ++i) {
557 unsigned Idx = Imm ? i : (i + NumElts);
558 Idxs2.push_back(Builder.getInt32(Idx));
560 // The high half of the result is either the low half of the 2nd operand
561 // (the inserted vector) or the high half of the 1st operand.
562 for (unsigned i = NumElts / 2; i != NumElts; ++i) {
563 unsigned Idx = Imm ? (i + NumElts / 2) : i;
564 Idxs2.push_back(Builder.getInt32(Idx));
566 Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
567 } else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
568 Name == "llvm.x86.avx.vextractf128.ps.256" ||
569 Name == "llvm.x86.avx.vextractf128.si.256" ||
570 Name == "llvm.x86.avx2.vextracti128") {
571 Value *Op0 = CI->getArgOperand(0);
572 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
573 VectorType *VecTy = cast<VectorType>(CI->getType());
574 unsigned NumElts = VecTy->getNumElements();
576 // Mask off the high bits of the immediate value; hardware ignores those.
579 // Get indexes for either the high half or low half of the input vector.
580 SmallVector<Constant*, 4> Idxs(NumElts);
581 for (unsigned i = 0; i != NumElts; ++i) {
582 unsigned Idx = Imm ? (i + NumElts) : i;
583 Idxs[i] = Builder.getInt32(Idx);
586 Value *UndefV = UndefValue::get(Op0->getType());
587 Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs));
589 bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
590 if (Name == "llvm.x86.avx.vpermil.pd.256")
592 else if (Name == "llvm.x86.avx.vpermil.pd")
594 else if (Name == "llvm.x86.avx.vpermil.ps.256")
596 else if (Name == "llvm.x86.avx.vpermil.ps")
599 if (PD256 || PD128 || PS256 || PS128) {
600 Value *Op0 = CI->getArgOperand(0);
601 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
602 SmallVector<Constant*, 8> Idxs;
605 for (unsigned i = 0; i != 2; ++i)
606 Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1));
608 for (unsigned l = 0; l != 4; l+=2)
609 for (unsigned i = 0; i != 2; ++i)
610 Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l));
612 for (unsigned i = 0; i != 4; ++i)
613 Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3));
615 for (unsigned l = 0; l != 8; l+=4)
616 for (unsigned i = 0; i != 4; ++i)
617 Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l));
619 llvm_unreachable("Unexpected function");
621 Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs));
623 llvm_unreachable("Unknown function for CallInst upgrade.");
627 CI->replaceAllUsesWith(Rep);
628 CI->eraseFromParent();
632 std::string Name = CI->getName();
634 CI->setName(Name + ".old");
636 switch (NewFn->getIntrinsicID()) {
638 llvm_unreachable("Unknown function for CallInst upgrade.");
640 case Intrinsic::ctlz:
641 case Intrinsic::cttz:
642 assert(CI->getNumArgOperands() == 1 &&
643 "Mismatch between function args and call args");
644 CI->replaceAllUsesWith(Builder.CreateCall(
645 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name));
646 CI->eraseFromParent();
649 case Intrinsic::objectsize:
650 CI->replaceAllUsesWith(Builder.CreateCall(
651 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name));
652 CI->eraseFromParent();
655 case Intrinsic::ctpop: {
656 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
657 CI->eraseFromParent();
661 case Intrinsic::x86_xop_vfrcz_ss:
662 case Intrinsic::x86_xop_vfrcz_sd:
663 CI->replaceAllUsesWith(
664 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name));
665 CI->eraseFromParent();
668 case Intrinsic::x86_sse41_ptestc:
669 case Intrinsic::x86_sse41_ptestz:
670 case Intrinsic::x86_sse41_ptestnzc: {
671 // The arguments for these intrinsics used to be v4f32, and changed
672 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
673 // So, the only thing required is a bitcast for both arguments.
674 // First, check the arguments have the old type.
675 Value *Arg0 = CI->getArgOperand(0);
676 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
679 // Old intrinsic, add bitcasts
680 Value *Arg1 = CI->getArgOperand(1);
682 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
684 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
685 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
687 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name);
688 CI->replaceAllUsesWith(NewCall);
689 CI->eraseFromParent();
693 case Intrinsic::x86_sse41_insertps:
694 case Intrinsic::x86_sse41_dppd:
695 case Intrinsic::x86_sse41_dpps:
696 case Intrinsic::x86_sse41_mpsadbw:
697 case Intrinsic::x86_avx_dp_ps_256:
698 case Intrinsic::x86_avx2_mpsadbw: {
699 // Need to truncate the last argument from i32 to i8 -- this argument models
700 // an inherently 8-bit immediate operand to these x86 instructions.
701 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
702 CI->arg_operands().end());
704 // Replace the last argument with a trunc.
705 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
707 CallInst *NewCall = Builder.CreateCall(NewFn, Args);
708 CI->replaceAllUsesWith(NewCall);
709 CI->eraseFromParent();
715 // This tests each Function to determine if it needs upgrading. When we find
716 // one we are interested in, we then upgrade all calls to reflect the new
718 void llvm::UpgradeCallsToIntrinsic(Function* F) {
719 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
721 // Upgrade the function and check if it is a totaly new function.
723 if (UpgradeIntrinsicFunction(F, NewFn)) {
724 // Replace all uses to the old function with the new one if necessary.
725 for (Value::user_iterator UI = F->user_begin(), UE = F->user_end();
727 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
728 UpgradeIntrinsicCall(CI, NewFn);
730 // Remove old function, no longer used, from the module.
731 F->eraseFromParent();
735 void llvm::UpgradeInstWithTBAATag(Instruction *I) {
736 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa);
737 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
738 // Check if the tag uses struct-path aware TBAA format.
739 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3)
742 if (MD->getNumOperands() == 3) {
743 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)};
744 MDNode *ScalarType = MDNode::get(I->getContext(), Elts);
745 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
746 Metadata *Elts2[] = {ScalarType, ScalarType,
747 ConstantAsMetadata::get(Constant::getNullValue(
748 Type::getInt64Ty(I->getContext()))),
750 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2));
752 // Create a MDNode <MD, MD, offset 0>
753 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue(
754 Type::getInt64Ty(I->getContext())))};
755 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts));
759 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
760 Instruction *&Temp) {
761 if (Opc != Instruction::BitCast)
765 Type *SrcTy = V->getType();
766 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
767 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
768 LLVMContext &Context = V->getContext();
770 // We have no information about target data layout, so we assume that
771 // the maximum pointer size is 64bit.
772 Type *MidTy = Type::getInt64Ty(Context);
773 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
775 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
781 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
782 if (Opc != Instruction::BitCast)
785 Type *SrcTy = C->getType();
786 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
787 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
788 LLVMContext &Context = C->getContext();
790 // We have no information about target data layout, so we assume that
791 // the maximum pointer size is 64bit.
792 Type *MidTy = Type::getInt64Ty(Context);
794 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
801 /// Check the debug info version number, if it is out-dated, drop the debug
802 /// info. Return true if module is modified.
803 bool llvm::UpgradeDebugInfo(Module &M) {
804 unsigned Version = getDebugMetadataVersionFromModule(M);
805 if (Version == DEBUG_METADATA_VERSION)
808 bool RetCode = StripDebugInfo(M);
810 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
811 M.getContext().diagnose(DiagVersion);
816 void llvm::UpgradeMDStringConstant(std::string &String) {
817 const std::string OldPrefix = "llvm.vectorizer.";
818 if (String == "llvm.vectorizer.unroll") {
819 String = "llvm.loop.interleave.count";
820 } else if (String.find(OldPrefix) == 0) {
821 String.replace(0, OldPrefix.size(), "llvm.loop.vectorize.");