1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/IR/CFG.h"
18 #include "llvm/IR/CallSite.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DiagnosticInfo.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Support/ErrorHandling.h"
33 // Upgrade the declarations of the SSE4.1 functions whose arguments have
34 // changed their type from v4f32 to v2i64.
35 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
37 // Check whether this is an old version of the function, which received
39 Type *Arg0Type = F->getFunctionType()->getParamType(0);
40 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
43 // Yes, it's old, replace it with new version.
44 F->setName(F->getName() + ".old");
45 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
49 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
50 // arguments have changed their type from i32 to i8.
51 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
53 // Check that the last argument is an i32.
54 Type *LastArgType = F->getFunctionType()->getParamType(
55 F->getFunctionType()->getNumParams() - 1);
56 if (!LastArgType->isIntegerTy(32))
59 // Move this function aside and map down.
60 F->setName(F->getName() + ".old");
61 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
65 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
66 assert(F && "Illegal to upgrade a non-existent Function.");
68 // Quickly eliminate it, if it's not a candidate.
69 StringRef Name = F->getName();
70 if (Name.size() <= 8 || !Name.startswith("llvm."))
72 Name = Name.substr(5); // Strip off "llvm."
77 if (Name.startswith("arm.neon.vclz")) {
79 F->arg_begin()->getType(),
80 Type::getInt1Ty(F->getContext())
82 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
83 // the end of the name. Change name from llvm.arm.neon.vclz.* to
85 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
86 NewFn = Function::Create(fType, F->getLinkage(),
87 "llvm.ctlz." + Name.substr(14), F->getParent());
90 if (Name.startswith("arm.neon.vcnt")) {
91 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
92 F->arg_begin()->getType());
98 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
99 F->setName(Name + ".old");
100 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
101 F->arg_begin()->getType());
104 if (Name.startswith("cttz.") && F->arg_size() == 1) {
105 F->setName(Name + ".old");
106 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
107 F->arg_begin()->getType());
114 // We only need to change the name to match the mangling including the
116 if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
117 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
118 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
119 F->setName(Name + ".old");
120 NewFn = Intrinsic::getDeclaration(F->getParent(),
121 Intrinsic::objectsize, Tys);
128 if (Name.startswith("x86.sse2.pcmpeq.") ||
129 Name.startswith("x86.sse2.pcmpgt.") ||
130 Name.startswith("x86.avx2.pcmpeq.") ||
131 Name.startswith("x86.avx2.pcmpgt.") ||
132 Name.startswith("x86.avx.vpermil.") ||
133 Name == "x86.avx.vinsertf128.pd.256" ||
134 Name == "x86.avx.vinsertf128.ps.256" ||
135 Name == "x86.avx.vinsertf128.si.256" ||
136 Name == "x86.avx2.vinserti128" ||
137 Name == "x86.avx.vextractf128.pd.256" ||
138 Name == "x86.avx.vextractf128.ps.256" ||
139 Name == "x86.avx.vextractf128.si.256" ||
140 Name == "x86.avx2.vextracti128" ||
141 Name == "x86.avx.movnt.dq.256" ||
142 Name == "x86.avx.movnt.pd.256" ||
143 Name == "x86.avx.movnt.ps.256" ||
144 Name == "x86.sse42.crc32.64.8" ||
145 Name == "x86.avx.vbroadcast.ss" ||
146 Name == "x86.avx.vbroadcast.ss.256" ||
147 Name == "x86.avx.vbroadcast.sd.256" ||
148 Name == "x86.sse2.psll.dq" ||
149 Name == "x86.sse2.psrl.dq" ||
150 Name == "x86.avx2.psll.dq" ||
151 Name == "x86.avx2.psrl.dq" ||
152 Name == "x86.sse2.psll.dq.bs" ||
153 Name == "x86.sse2.psrl.dq.bs" ||
154 Name == "x86.avx2.psll.dq.bs" ||
155 Name == "x86.avx2.psrl.dq.bs" ||
156 Name == "x86.sse41.pblendw" ||
157 Name == "x86.sse41.blendpd" ||
158 Name == "x86.sse41.blendps" ||
159 Name == "x86.avx.blend.pd.256" ||
160 Name == "x86.avx.blend.ps.256" ||
161 Name == "x86.avx2.pblendw" ||
162 Name == "x86.avx2.pblendd.128" ||
163 Name == "x86.avx2.pblendd.256" ||
164 Name == "x86.avx2.vbroadcasti128" ||
165 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
169 // SSE4.1 ptest functions may have an old signature.
170 if (Name.startswith("x86.sse41.ptest")) {
171 if (Name == "x86.sse41.ptestc")
172 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
173 if (Name == "x86.sse41.ptestz")
174 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
175 if (Name == "x86.sse41.ptestnzc")
176 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
178 // Several blend and other instructions with masks used the wrong number of
180 if (Name == "x86.sse41.insertps")
181 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
183 if (Name == "x86.sse41.dppd")
184 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
186 if (Name == "x86.sse41.dpps")
187 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
189 if (Name == "x86.sse41.mpsadbw")
190 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
192 if (Name == "x86.avx.dp.ps.256")
193 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
195 if (Name == "x86.avx2.mpsadbw")
196 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
199 // frcz.ss/sd may need to have an argument dropped
200 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
201 F->setName(Name + ".old");
202 NewFn = Intrinsic::getDeclaration(F->getParent(),
203 Intrinsic::x86_xop_vfrcz_ss);
206 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
207 F->setName(Name + ".old");
208 NewFn = Intrinsic::getDeclaration(F->getParent(),
209 Intrinsic::x86_xop_vfrcz_sd);
212 // Fix the FMA4 intrinsics to remove the 4
213 if (Name.startswith("x86.fma4.")) {
214 F->setName("llvm.x86.fma" + Name.substr(8));
222 // This may not belong here. This function is effectively being overloaded
223 // to both detect an intrinsic which needs upgrading, and to provide the
224 // upgraded form of the intrinsic. We should perhaps have two separate
225 // functions for this.
229 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
231 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
233 // Upgrade intrinsic attributes. This does not change the function.
236 if (Intrinsic::ID id = F->getIntrinsicID())
237 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
241 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
242 // Nothing to do yet.
246 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them
248 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
249 Value *Op, unsigned NumLanes,
251 // Each lane is 16 bytes.
252 unsigned NumElts = NumLanes * 16;
254 // Bitcast from a 64-bit element type to a byte element type.
255 Op = Builder.CreateBitCast(Op,
256 VectorType::get(Type::getInt8Ty(C), NumElts),
258 // We'll be shuffling in zeroes.
259 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
261 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
262 // we'll just return the zero vector.
264 SmallVector<Constant*, 32> Idxs;
265 // 256-bit version is split into two 16-byte lanes.
266 for (unsigned l = 0; l != NumElts; l += 16)
267 for (unsigned i = 0; i != 16; ++i) {
268 unsigned Idx = NumElts + i - Shift;
270 Idx -= NumElts - 16; // end of lane, switch operand.
271 Idxs.push_back(Builder.getInt32(Idx + l));
274 Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs));
277 // Bitcast back to a 64-bit element type.
278 return Builder.CreateBitCast(Res,
279 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
283 // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them
285 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
286 Value *Op, unsigned NumLanes,
288 // Each lane is 16 bytes.
289 unsigned NumElts = NumLanes * 16;
291 // Bitcast from a 64-bit element type to a byte element type.
292 Op = Builder.CreateBitCast(Op,
293 VectorType::get(Type::getInt8Ty(C), NumElts),
295 // We'll be shuffling in zeroes.
296 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
298 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
299 // we'll just return the zero vector.
301 SmallVector<Constant*, 32> Idxs;
302 // 256-bit version is split into two 16-byte lanes.
303 for (unsigned l = 0; l != NumElts; l += 16)
304 for (unsigned i = 0; i != 16; ++i) {
305 unsigned Idx = i + Shift;
307 Idx += NumElts - 16; // end of lane, switch operand.
308 Idxs.push_back(Builder.getInt32(Idx + l));
311 Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs));
314 // Bitcast back to a 64-bit element type.
315 return Builder.CreateBitCast(Res,
316 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
320 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
321 // upgraded intrinsic. All argument and return casting must be provided in
322 // order to seamlessly integrate with existing context.
323 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
324 Function *F = CI->getCalledFunction();
325 LLVMContext &C = CI->getContext();
326 IRBuilder<> Builder(C);
327 Builder.SetInsertPoint(CI->getParent(), CI);
329 assert(F && "Intrinsic call is not direct?");
332 // Get the Function's name.
333 StringRef Name = F->getName();
336 // Upgrade packed integer vector compares intrinsics to compare instructions
337 if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
338 Name.startswith("llvm.x86.avx2.pcmpeq.")) {
339 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
341 // need to sign extend since icmp returns vector of i1
342 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
343 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
344 Name.startswith("llvm.x86.avx2.pcmpgt.")) {
345 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
347 // need to sign extend since icmp returns vector of i1
348 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
349 } else if (Name == "llvm.x86.avx.movnt.dq.256" ||
350 Name == "llvm.x86.avx.movnt.ps.256" ||
351 Name == "llvm.x86.avx.movnt.pd.256") {
352 IRBuilder<> Builder(C);
353 Builder.SetInsertPoint(CI->getParent(), CI);
355 Module *M = F->getParent();
356 SmallVector<Metadata *, 1> Elts;
358 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
359 MDNode *Node = MDNode::get(C, Elts);
361 Value *Arg0 = CI->getArgOperand(0);
362 Value *Arg1 = CI->getArgOperand(1);
364 // Convert the type of the pointer to a pointer to the stored type.
365 Value *BC = Builder.CreateBitCast(Arg0,
366 PointerType::getUnqual(Arg1->getType()),
368 StoreInst *SI = Builder.CreateStore(Arg1, BC);
369 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
370 SI->setAlignment(16);
373 CI->eraseFromParent();
375 } else if (Name.startswith("llvm.x86.xop.vpcom")) {
377 if (Name.endswith("ub"))
378 intID = Intrinsic::x86_xop_vpcomub;
379 else if (Name.endswith("uw"))
380 intID = Intrinsic::x86_xop_vpcomuw;
381 else if (Name.endswith("ud"))
382 intID = Intrinsic::x86_xop_vpcomud;
383 else if (Name.endswith("uq"))
384 intID = Intrinsic::x86_xop_vpcomuq;
385 else if (Name.endswith("b"))
386 intID = Intrinsic::x86_xop_vpcomb;
387 else if (Name.endswith("w"))
388 intID = Intrinsic::x86_xop_vpcomw;
389 else if (Name.endswith("d"))
390 intID = Intrinsic::x86_xop_vpcomd;
391 else if (Name.endswith("q"))
392 intID = Intrinsic::x86_xop_vpcomq;
394 llvm_unreachable("Unknown suffix");
396 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom"
398 if (Name.startswith("lt"))
400 else if (Name.startswith("le"))
402 else if (Name.startswith("gt"))
404 else if (Name.startswith("ge"))
406 else if (Name.startswith("eq"))
408 else if (Name.startswith("ne"))
410 else if (Name.startswith("false"))
412 else if (Name.startswith("true"))
415 llvm_unreachable("Unknown condition");
417 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
419 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
420 Builder.getInt8(Imm)});
421 } else if (Name == "llvm.x86.sse42.crc32.64.8") {
422 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
423 Intrinsic::x86_sse42_crc32_32_8);
424 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
425 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
426 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
427 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) {
428 // Replace broadcasts with a series of insertelements.
429 Type *VecTy = CI->getType();
430 Type *EltTy = VecTy->getVectorElementType();
431 unsigned EltNum = VecTy->getVectorNumElements();
432 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
433 EltTy->getPointerTo());
434 Value *Load = Builder.CreateLoad(EltTy, Cast);
435 Type *I32Ty = Type::getInt32Ty(C);
436 Rep = UndefValue::get(VecTy);
437 for (unsigned I = 0; I < EltNum; ++I)
438 Rep = Builder.CreateInsertElement(Rep, Load,
439 ConstantInt::get(I32Ty, I));
440 } else if (Name == "llvm.x86.avx2.vbroadcasti128") {
441 // Replace vbroadcasts with a vector shuffle.
442 Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
443 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
444 PointerType::getUnqual(VT));
445 Value *Load = Builder.CreateLoad(VT, Op);
446 const int Idxs[4] = { 0, 1, 0, 1 };
447 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
449 } else if (Name == "llvm.x86.sse2.psll.dq") {
450 // 128-bit shift left specified in bits.
451 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
452 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
453 Shift / 8); // Shift is in bits.
454 } else if (Name == "llvm.x86.sse2.psrl.dq") {
455 // 128-bit shift right specified in bits.
456 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
457 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
458 Shift / 8); // Shift is in bits.
459 } else if (Name == "llvm.x86.avx2.psll.dq") {
460 // 256-bit shift left specified in bits.
461 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
462 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
463 Shift / 8); // Shift is in bits.
464 } else if (Name == "llvm.x86.avx2.psrl.dq") {
465 // 256-bit shift right specified in bits.
466 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
467 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
468 Shift / 8); // Shift is in bits.
469 } else if (Name == "llvm.x86.sse2.psll.dq.bs") {
470 // 128-bit shift left specified in bytes.
471 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
472 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
474 } else if (Name == "llvm.x86.sse2.psrl.dq.bs") {
475 // 128-bit shift right specified in bytes.
476 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
477 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
479 } else if (Name == "llvm.x86.avx2.psll.dq.bs") {
480 // 256-bit shift left specified in bytes.
481 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
482 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
484 } else if (Name == "llvm.x86.avx2.psrl.dq.bs") {
485 // 256-bit shift right specified in bytes.
486 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
487 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
489 } else if (Name == "llvm.x86.sse41.pblendw" ||
490 Name == "llvm.x86.sse41.blendpd" ||
491 Name == "llvm.x86.sse41.blendps" ||
492 Name == "llvm.x86.avx.blend.pd.256" ||
493 Name == "llvm.x86.avx.blend.ps.256" ||
494 Name == "llvm.x86.avx2.pblendw" ||
495 Name == "llvm.x86.avx2.pblendd.128" ||
496 Name == "llvm.x86.avx2.pblendd.256") {
497 Value *Op0 = CI->getArgOperand(0);
498 Value *Op1 = CI->getArgOperand(1);
499 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
500 VectorType *VecTy = cast<VectorType>(CI->getType());
501 unsigned NumElts = VecTy->getNumElements();
503 SmallVector<Constant*, 16> Idxs;
504 for (unsigned i = 0; i != NumElts; ++i) {
505 unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
506 Idxs.push_back(Builder.getInt32(Idx));
509 Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
510 } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" ||
511 Name == "llvm.x86.avx.vinsertf128.ps.256" ||
512 Name == "llvm.x86.avx.vinsertf128.si.256" ||
513 Name == "llvm.x86.avx2.vinserti128") {
514 Value *Op0 = CI->getArgOperand(0);
515 Value *Op1 = CI->getArgOperand(1);
516 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
517 VectorType *VecTy = cast<VectorType>(CI->getType());
518 unsigned NumElts = VecTy->getNumElements();
520 // Mask off the high bits of the immediate value; hardware ignores those.
523 // Extend the second operand into a vector that is twice as big.
524 Value *UndefV = UndefValue::get(Op1->getType());
525 SmallVector<Constant*, 8> Idxs;
526 for (unsigned i = 0; i != NumElts; ++i) {
527 Idxs.push_back(Builder.getInt32(i));
529 Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs));
531 // Insert the second operand into the first operand.
533 // Note that there is no guarantee that instruction lowering will actually
534 // produce a vinsertf128 instruction for the created shuffles. In
535 // particular, the 0 immediate case involves no lane changes, so it can
536 // be handled as a blend.
538 // Example of shuffle mask for 32-bit elements:
539 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
540 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
542 SmallVector<Constant*, 8> Idxs2;
543 // The low half of the result is either the low half of the 1st operand
544 // or the low half of the 2nd operand (the inserted vector).
545 for (unsigned i = 0; i != NumElts / 2; ++i) {
546 unsigned Idx = Imm ? i : (i + NumElts);
547 Idxs2.push_back(Builder.getInt32(Idx));
549 // The high half of the result is either the low half of the 2nd operand
550 // (the inserted vector) or the high half of the 1st operand.
551 for (unsigned i = NumElts / 2; i != NumElts; ++i) {
552 unsigned Idx = Imm ? (i + NumElts / 2) : i;
553 Idxs2.push_back(Builder.getInt32(Idx));
555 Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
556 } else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
557 Name == "llvm.x86.avx.vextractf128.ps.256" ||
558 Name == "llvm.x86.avx.vextractf128.si.256" ||
559 Name == "llvm.x86.avx2.vextracti128") {
560 Value *Op0 = CI->getArgOperand(0);
561 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
562 VectorType *VecTy = cast<VectorType>(CI->getType());
563 unsigned NumElts = VecTy->getNumElements();
565 // Mask off the high bits of the immediate value; hardware ignores those.
568 // Get indexes for either the high half or low half of the input vector.
569 SmallVector<Constant*, 4> Idxs(NumElts);
570 for (unsigned i = 0; i != NumElts; ++i) {
571 unsigned Idx = Imm ? (i + NumElts) : i;
572 Idxs[i] = Builder.getInt32(Idx);
575 Value *UndefV = UndefValue::get(Op0->getType());
576 Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs));
578 bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
579 if (Name == "llvm.x86.avx.vpermil.pd.256")
581 else if (Name == "llvm.x86.avx.vpermil.pd")
583 else if (Name == "llvm.x86.avx.vpermil.ps.256")
585 else if (Name == "llvm.x86.avx.vpermil.ps")
588 if (PD256 || PD128 || PS256 || PS128) {
589 Value *Op0 = CI->getArgOperand(0);
590 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
591 SmallVector<Constant*, 8> Idxs;
594 for (unsigned i = 0; i != 2; ++i)
595 Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1));
597 for (unsigned l = 0; l != 4; l+=2)
598 for (unsigned i = 0; i != 2; ++i)
599 Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l));
601 for (unsigned i = 0; i != 4; ++i)
602 Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3));
604 for (unsigned l = 0; l != 8; l+=4)
605 for (unsigned i = 0; i != 4; ++i)
606 Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l));
608 llvm_unreachable("Unexpected function");
610 Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs));
612 llvm_unreachable("Unknown function for CallInst upgrade.");
616 CI->replaceAllUsesWith(Rep);
617 CI->eraseFromParent();
621 std::string Name = CI->getName();
623 CI->setName(Name + ".old");
625 switch (NewFn->getIntrinsicID()) {
627 llvm_unreachable("Unknown function for CallInst upgrade.");
629 case Intrinsic::ctlz:
630 case Intrinsic::cttz:
631 assert(CI->getNumArgOperands() == 1 &&
632 "Mismatch between function args and call args");
633 CI->replaceAllUsesWith(Builder.CreateCall(
634 NewFn, {CI->getArgOperand(0), Builder.getFalse()}, Name));
635 CI->eraseFromParent();
638 case Intrinsic::objectsize:
639 CI->replaceAllUsesWith(Builder.CreateCall(
640 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)}, Name));
641 CI->eraseFromParent();
644 case Intrinsic::ctpop: {
645 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, {CI->getArgOperand(0)}));
646 CI->eraseFromParent();
650 case Intrinsic::x86_xop_vfrcz_ss:
651 case Intrinsic::x86_xop_vfrcz_sd:
652 CI->replaceAllUsesWith(
653 Builder.CreateCall(NewFn, {CI->getArgOperand(1)}, Name));
654 CI->eraseFromParent();
657 case Intrinsic::x86_sse41_ptestc:
658 case Intrinsic::x86_sse41_ptestz:
659 case Intrinsic::x86_sse41_ptestnzc: {
660 // The arguments for these intrinsics used to be v4f32, and changed
661 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
662 // So, the only thing required is a bitcast for both arguments.
663 // First, check the arguments have the old type.
664 Value *Arg0 = CI->getArgOperand(0);
665 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
668 // Old intrinsic, add bitcasts
669 Value *Arg1 = CI->getArgOperand(1);
671 Type *NewVecTy = VectorType::get(Type::getInt64Ty(C), 2);
673 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
674 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
676 CallInst *NewCall = Builder.CreateCall(NewFn, {BC0, BC1}, Name);
677 CI->replaceAllUsesWith(NewCall);
678 CI->eraseFromParent();
682 case Intrinsic::x86_sse41_insertps:
683 case Intrinsic::x86_sse41_dppd:
684 case Intrinsic::x86_sse41_dpps:
685 case Intrinsic::x86_sse41_mpsadbw:
686 case Intrinsic::x86_avx_dp_ps_256:
687 case Intrinsic::x86_avx2_mpsadbw: {
688 // Need to truncate the last argument from i32 to i8 -- this argument models
689 // an inherently 8-bit immediate operand to these x86 instructions.
690 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
691 CI->arg_operands().end());
693 // Replace the last argument with a trunc.
694 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
696 CallInst *NewCall = Builder.CreateCall(NewFn, Args);
697 CI->replaceAllUsesWith(NewCall);
698 CI->eraseFromParent();
704 // This tests each Function to determine if it needs upgrading. When we find
705 // one we are interested in, we then upgrade all calls to reflect the new
707 void llvm::UpgradeCallsToIntrinsic(Function* F) {
708 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
710 // Upgrade the function and check if it is a totaly new function.
712 if (UpgradeIntrinsicFunction(F, NewFn)) {
714 // Replace all uses to the old function with the new one if necessary.
715 for (Value::user_iterator UI = F->user_begin(), UE = F->user_end();
717 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
718 UpgradeIntrinsicCall(CI, NewFn);
720 // Remove old function, no longer used, from the module.
721 F->eraseFromParent();
726 void llvm::UpgradeInstWithTBAATag(Instruction *I) {
727 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa);
728 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
729 // Check if the tag uses struct-path aware TBAA format.
730 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3)
733 if (MD->getNumOperands() == 3) {
734 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)};
735 MDNode *ScalarType = MDNode::get(I->getContext(), Elts);
736 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
737 Metadata *Elts2[] = {ScalarType, ScalarType,
738 ConstantAsMetadata::get(Constant::getNullValue(
739 Type::getInt64Ty(I->getContext()))),
741 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2));
743 // Create a MDNode <MD, MD, offset 0>
744 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue(
745 Type::getInt64Ty(I->getContext())))};
746 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts));
750 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
751 Instruction *&Temp) {
752 if (Opc != Instruction::BitCast)
756 Type *SrcTy = V->getType();
757 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
758 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
759 LLVMContext &Context = V->getContext();
761 // We have no information about target data layout, so we assume that
762 // the maximum pointer size is 64bit.
763 Type *MidTy = Type::getInt64Ty(Context);
764 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
766 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
772 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
773 if (Opc != Instruction::BitCast)
776 Type *SrcTy = C->getType();
777 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
778 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
779 LLVMContext &Context = C->getContext();
781 // We have no information about target data layout, so we assume that
782 // the maximum pointer size is 64bit.
783 Type *MidTy = Type::getInt64Ty(Context);
785 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
792 /// Check the debug info version number, if it is out-dated, drop the debug
793 /// info. Return true if module is modified.
794 bool llvm::UpgradeDebugInfo(Module &M) {
795 unsigned Version = getDebugMetadataVersionFromModule(M);
796 if (Version == DEBUG_METADATA_VERSION)
799 bool RetCode = StripDebugInfo(M);
801 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
802 M.getContext().diagnose(DiagVersion);
807 void llvm::UpgradeMDStringConstant(std::string &String) {
808 const std::string OldPrefix = "llvm.vectorizer.";
809 if (String == "llvm.vectorizer.unroll") {
810 String = "llvm.loop.interleave.count";
811 } else if (String.find(OldPrefix) == 0) {
812 String.replace(0, OldPrefix.size(), "llvm.loop.vectorize.");