1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to
12 // current specifications.
14 //===----------------------------------------------------------------------===//
16 #include "llvm/IR/AutoUpgrade.h"
17 #include "llvm/IR/CFG.h"
18 #include "llvm/IR/CallSite.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DIBuilder.h"
21 #include "llvm/IR/DebugInfo.h"
22 #include "llvm/IR/DiagnosticInfo.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Instruction.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/Support/ErrorHandling.h"
33 // Upgrade the declarations of the SSE4.1 functions whose arguments have
34 // changed their type from v4f32 to v2i64.
35 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
37 // Check whether this is an old version of the function, which received
39 Type *Arg0Type = F->getFunctionType()->getParamType(0);
40 if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
43 // Yes, it's old, replace it with new version.
44 F->setName(F->getName() + ".old");
45 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
49 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
50 // arguments have changed their type from i32 to i8.
51 static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
53 // Check that the last argument is an i32.
54 Type *LastArgType = F->getFunctionType()->getParamType(
55 F->getFunctionType()->getNumParams() - 1);
56 if (!LastArgType->isIntegerTy(32))
59 // Move this function aside and map down.
60 F->setName(F->getName() + ".old");
61 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
65 // Upgrade the declarations of AVX-512 cmp intrinsic functions whose 8-bit
66 // immediates have changed their type from i32 to i8.
67 static bool UpgradeAVX512CmpIntrinsic(Function *F, Intrinsic::ID IID,
69 // Check that the last argument is an i32.
70 Type *LastArgType = F->getFunctionType()->getParamType(2);
71 if (!LastArgType->isIntegerTy(32))
74 // Move this function aside and map down.
75 F->setName(F->getName() + ".old");
76 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
80 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
81 assert(F && "Illegal to upgrade a non-existent Function.");
83 // Quickly eliminate it, if it's not a candidate.
84 StringRef Name = F->getName();
85 if (Name.size() <= 8 || !Name.startswith("llvm."))
87 Name = Name.substr(5); // Strip off "llvm."
92 if (Name.startswith("arm.neon.vclz")) {
94 F->arg_begin()->getType(),
95 Type::getInt1Ty(F->getContext())
97 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
98 // the end of the name. Change name from llvm.arm.neon.vclz.* to
100 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
101 NewFn = Function::Create(fType, F->getLinkage(),
102 "llvm.ctlz." + Name.substr(14), F->getParent());
105 if (Name.startswith("arm.neon.vcnt")) {
106 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
107 F->arg_begin()->getType());
113 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
114 F->setName(Name + ".old");
115 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
116 F->arg_begin()->getType());
119 if (Name.startswith("cttz.") && F->arg_size() == 1) {
120 F->setName(Name + ".old");
121 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
122 F->arg_begin()->getType());
129 // We only need to change the name to match the mangling including the
131 if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
132 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
133 if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
134 F->setName(Name + ".old");
135 NewFn = Intrinsic::getDeclaration(F->getParent(),
136 Intrinsic::objectsize, Tys);
143 if (Name.startswith("x86.sse2.pcmpeq.") ||
144 Name.startswith("x86.sse2.pcmpgt.") ||
145 Name.startswith("x86.avx2.pcmpeq.") ||
146 Name.startswith("x86.avx2.pcmpgt.") ||
147 Name.startswith("x86.avx.vpermil.") ||
148 Name == "x86.avx.vinsertf128.pd.256" ||
149 Name == "x86.avx.vinsertf128.ps.256" ||
150 Name == "x86.avx.vinsertf128.si.256" ||
151 Name == "x86.avx2.vinserti128" ||
152 Name == "x86.avx.vextractf128.pd.256" ||
153 Name == "x86.avx.vextractf128.ps.256" ||
154 Name == "x86.avx.vextractf128.si.256" ||
155 Name == "x86.avx2.vextracti128" ||
156 Name == "x86.avx.movnt.dq.256" ||
157 Name == "x86.avx.movnt.pd.256" ||
158 Name == "x86.avx.movnt.ps.256" ||
159 Name == "x86.sse42.crc32.64.8" ||
160 Name == "x86.avx.vbroadcast.ss" ||
161 Name == "x86.avx.vbroadcast.ss.256" ||
162 Name == "x86.avx.vbroadcast.sd.256" ||
163 Name == "x86.sse2.psll.dq" ||
164 Name == "x86.sse2.psrl.dq" ||
165 Name == "x86.avx2.psll.dq" ||
166 Name == "x86.avx2.psrl.dq" ||
167 Name == "x86.sse2.psll.dq.bs" ||
168 Name == "x86.sse2.psrl.dq.bs" ||
169 Name == "x86.avx2.psll.dq.bs" ||
170 Name == "x86.avx2.psrl.dq.bs" ||
171 Name == "x86.sse41.pblendw" ||
172 Name == "x86.sse41.blendpd" ||
173 Name == "x86.sse41.blendps" ||
174 Name == "x86.avx.blend.pd.256" ||
175 Name == "x86.avx.blend.ps.256" ||
176 Name == "x86.avx2.pblendw" ||
177 Name == "x86.avx2.pblendd.128" ||
178 Name == "x86.avx2.pblendd.256" ||
179 Name == "x86.avx2.vbroadcasti128" ||
180 (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
184 // SSE4.1 ptest functions may have an old signature.
185 if (Name.startswith("x86.sse41.ptest")) {
186 if (Name == "x86.sse41.ptestc")
187 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
188 if (Name == "x86.sse41.ptestz")
189 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
190 if (Name == "x86.sse41.ptestnzc")
191 return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
193 // Several blend and other instructions with masks used the wrong number of
195 if (Name == "x86.sse41.insertps")
196 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
198 if (Name == "x86.sse41.dppd")
199 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
201 if (Name == "x86.sse41.dpps")
202 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
204 if (Name == "x86.sse41.mpsadbw")
205 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
207 if (Name == "x86.avx.dp.ps.256")
208 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
210 if (Name == "x86.avx2.mpsadbw")
211 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
214 if (Name == "x86.avx512.mask.cmp.ps.512")
215 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
217 if (Name == "x86.avx512.mask.cmp.pd.512")
218 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
221 if (Name == "x86.avx512.mask.cmp.b.512")
222 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_b_512,
224 if (Name == "x86.avx512.mask.cmp.w.512")
225 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_w_512,
227 if (Name == "x86.avx512.mask.cmp.d.512")
228 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_d_512,
230 if (Name == "x86.avx512.mask.cmp.q.512")
231 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_q_512,
233 if (Name == "x86.avx512.mask.ucmp.b.512")
234 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_b_512,
236 if (Name == "x86.avx512.mask.ucmp.w.512")
237 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_w_512,
239 if (Name == "x86.avx512.mask.ucmp.d.512")
240 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_d_512,
242 if (Name == "x86.avx512.mask.ucmp.q.512")
243 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_q_512,
246 if (Name == "x86.avx512.mask.cmp.b.256")
247 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_b_256,
249 if (Name == "x86.avx512.mask.cmp.w.256")
250 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_w_256,
252 if (Name == "x86.avx512.mask.cmp.d.256")
253 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_d_256,
255 if (Name == "x86.avx512.mask.cmp.q.256")
256 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_q_256,
258 if (Name == "x86.avx512.mask.ucmp.b.256")
259 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_b_256,
261 if (Name == "x86.avx512.mask.ucmp.w.256")
262 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_w_256,
264 if (Name == "x86.avx512.mask.ucmp.d.256")
265 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_d_256,
267 if (Name == "x86.avx512.mask.ucmp.q.256")
268 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_q_256,
271 if (Name == "x86.avx512.mask.cmp.b.128")
272 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_b_128,
274 if (Name == "x86.avx512.mask.cmp.w.128")
275 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_w_128,
277 if (Name == "x86.avx512.mask.cmp.d.128")
278 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_d_128,
280 if (Name == "x86.avx512.mask.cmp.q.128")
281 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_q_128,
283 if (Name == "x86.avx512.mask.ucmp.b.128")
284 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_b_128,
286 if (Name == "x86.avx512.mask.ucmp.w.128")
287 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_w_128,
289 if (Name == "x86.avx512.mask.ucmp.d.128")
290 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_d_128,
292 if (Name == "x86.avx512.mask.ucmp.q.128")
293 return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_q_128,
296 // frcz.ss/sd may need to have an argument dropped
297 if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
298 F->setName(Name + ".old");
299 NewFn = Intrinsic::getDeclaration(F->getParent(),
300 Intrinsic::x86_xop_vfrcz_ss);
303 if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
304 F->setName(Name + ".old");
305 NewFn = Intrinsic::getDeclaration(F->getParent(),
306 Intrinsic::x86_xop_vfrcz_sd);
309 // Fix the FMA4 intrinsics to remove the 4
310 if (Name.startswith("x86.fma4.")) {
311 F->setName("llvm.x86.fma" + Name.substr(8));
319 // This may not belong here. This function is effectively being overloaded
320 // to both detect an intrinsic which needs upgrading, and to provide the
321 // upgraded form of the intrinsic. We should perhaps have two separate
322 // functions for this.
326 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
328 bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
330 // Upgrade intrinsic attributes. This does not change the function.
333 if (unsigned id = F->getIntrinsicID())
334 F->setAttributes(Intrinsic::getAttributes(F->getContext(),
339 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
340 // Nothing to do yet.
344 // Handles upgrading SSE2 and AVX2 PSLLDQ intrinsics by converting them
346 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
347 Value *Op, unsigned NumLanes,
349 // Each lane is 16 bytes.
350 unsigned NumElts = NumLanes * 16;
352 // Bitcast from a 64-bit element type to a byte element type.
353 Op = Builder.CreateBitCast(Op,
354 VectorType::get(Type::getInt8Ty(C), NumElts),
356 // We'll be shuffling in zeroes.
357 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
359 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
360 // we'll just return the zero vector.
362 SmallVector<Constant*, 32> Idxs;
363 // 256-bit version is split into two 16-byte lanes.
364 for (unsigned l = 0; l != NumElts; l += 16)
365 for (unsigned i = 0; i != 16; ++i) {
366 unsigned Idx = NumElts + i - Shift;
368 Idx -= NumElts - 16; // end of lane, switch operand.
369 Idxs.push_back(Builder.getInt32(Idx + l));
372 Res = Builder.CreateShuffleVector(Res, Op, ConstantVector::get(Idxs));
375 // Bitcast back to a 64-bit element type.
376 return Builder.CreateBitCast(Res,
377 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
381 // Handles upgrading SSE2 and AVX2 PSRLDQ intrinsics by converting them
383 static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, LLVMContext &C,
384 Value *Op, unsigned NumLanes,
386 // Each lane is 16 bytes.
387 unsigned NumElts = NumLanes * 16;
389 // Bitcast from a 64-bit element type to a byte element type.
390 Op = Builder.CreateBitCast(Op,
391 VectorType::get(Type::getInt8Ty(C), NumElts),
393 // We'll be shuffling in zeroes.
394 Value *Res = ConstantVector::getSplat(NumElts, Builder.getInt8(0));
396 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
397 // we'll just return the zero vector.
399 SmallVector<Constant*, 32> Idxs;
400 // 256-bit version is split into two 16-byte lanes.
401 for (unsigned l = 0; l != NumElts; l += 16)
402 for (unsigned i = 0; i != 16; ++i) {
403 unsigned Idx = i + Shift;
405 Idx += NumElts - 16; // end of lane, switch operand.
406 Idxs.push_back(Builder.getInt32(Idx + l));
409 Res = Builder.CreateShuffleVector(Op, Res, ConstantVector::get(Idxs));
412 // Bitcast back to a 64-bit element type.
413 return Builder.CreateBitCast(Res,
414 VectorType::get(Type::getInt64Ty(C), 2*NumLanes),
418 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
419 // upgraded intrinsic. All argument and return casting must be provided in
420 // order to seamlessly integrate with existing context.
421 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
422 Function *F = CI->getCalledFunction();
423 LLVMContext &C = CI->getContext();
424 IRBuilder<> Builder(C);
425 Builder.SetInsertPoint(CI->getParent(), CI);
427 assert(F && "Intrinsic call is not direct?");
430 // Get the Function's name.
431 StringRef Name = F->getName();
434 // Upgrade packed integer vector compares intrinsics to compare instructions
435 if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
436 Name.startswith("llvm.x86.avx2.pcmpeq.")) {
437 Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
439 // need to sign extend since icmp returns vector of i1
440 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
441 } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
442 Name.startswith("llvm.x86.avx2.pcmpgt.")) {
443 Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
445 // need to sign extend since icmp returns vector of i1
446 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
447 } else if (Name == "llvm.x86.avx.movnt.dq.256" ||
448 Name == "llvm.x86.avx.movnt.ps.256" ||
449 Name == "llvm.x86.avx.movnt.pd.256") {
450 IRBuilder<> Builder(C);
451 Builder.SetInsertPoint(CI->getParent(), CI);
453 Module *M = F->getParent();
454 SmallVector<Metadata *, 1> Elts;
456 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
457 MDNode *Node = MDNode::get(C, Elts);
459 Value *Arg0 = CI->getArgOperand(0);
460 Value *Arg1 = CI->getArgOperand(1);
462 // Convert the type of the pointer to a pointer to the stored type.
463 Value *BC = Builder.CreateBitCast(Arg0,
464 PointerType::getUnqual(Arg1->getType()),
466 StoreInst *SI = Builder.CreateStore(Arg1, BC);
467 SI->setMetadata(M->getMDKindID("nontemporal"), Node);
468 SI->setAlignment(16);
471 CI->eraseFromParent();
473 } else if (Name.startswith("llvm.x86.xop.vpcom")) {
475 if (Name.endswith("ub"))
476 intID = Intrinsic::x86_xop_vpcomub;
477 else if (Name.endswith("uw"))
478 intID = Intrinsic::x86_xop_vpcomuw;
479 else if (Name.endswith("ud"))
480 intID = Intrinsic::x86_xop_vpcomud;
481 else if (Name.endswith("uq"))
482 intID = Intrinsic::x86_xop_vpcomuq;
483 else if (Name.endswith("b"))
484 intID = Intrinsic::x86_xop_vpcomb;
485 else if (Name.endswith("w"))
486 intID = Intrinsic::x86_xop_vpcomw;
487 else if (Name.endswith("d"))
488 intID = Intrinsic::x86_xop_vpcomd;
489 else if (Name.endswith("q"))
490 intID = Intrinsic::x86_xop_vpcomq;
492 llvm_unreachable("Unknown suffix");
494 Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom"
496 if (Name.startswith("lt"))
498 else if (Name.startswith("le"))
500 else if (Name.startswith("gt"))
502 else if (Name.startswith("ge"))
504 else if (Name.startswith("eq"))
506 else if (Name.startswith("ne"))
508 else if (Name.startswith("false"))
510 else if (Name.startswith("true"))
513 llvm_unreachable("Unknown condition");
515 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
516 Rep = Builder.CreateCall3(VPCOM, CI->getArgOperand(0),
517 CI->getArgOperand(1), Builder.getInt8(Imm));
518 } else if (Name == "llvm.x86.sse42.crc32.64.8") {
519 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
520 Intrinsic::x86_sse42_crc32_32_8);
521 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
522 Rep = Builder.CreateCall2(CRC32, Trunc0, CI->getArgOperand(1));
523 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
524 } else if (Name.startswith("llvm.x86.avx.vbroadcast")) {
525 // Replace broadcasts with a series of insertelements.
526 Type *VecTy = CI->getType();
527 Type *EltTy = VecTy->getVectorElementType();
528 unsigned EltNum = VecTy->getVectorNumElements();
529 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
530 EltTy->getPointerTo());
531 Value *Load = Builder.CreateLoad(Cast);
532 Type *I32Ty = Type::getInt32Ty(C);
533 Rep = UndefValue::get(VecTy);
534 for (unsigned I = 0; I < EltNum; ++I)
535 Rep = Builder.CreateInsertElement(Rep, Load,
536 ConstantInt::get(I32Ty, I));
537 } else if (Name == "llvm.x86.avx2.vbroadcasti128") {
538 // Replace vbroadcasts with a vector shuffle.
539 Value *Op = Builder.CreatePointerCast(
540 CI->getArgOperand(0),
541 PointerType::getUnqual(VectorType::get(Type::getInt64Ty(C), 2)));
542 Value *Load = Builder.CreateLoad(Op);
543 const int Idxs[4] = { 0, 1, 0, 1 };
544 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
546 } else if (Name == "llvm.x86.sse2.psll.dq") {
547 // 128-bit shift left specified in bits.
548 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
549 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
550 Shift / 8); // Shift is in bits.
551 } else if (Name == "llvm.x86.sse2.psrl.dq") {
552 // 128-bit shift right specified in bits.
553 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
554 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
555 Shift / 8); // Shift is in bits.
556 } else if (Name == "llvm.x86.avx2.psll.dq") {
557 // 256-bit shift left specified in bits.
558 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
559 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
560 Shift / 8); // Shift is in bits.
561 } else if (Name == "llvm.x86.avx2.psrl.dq") {
562 // 256-bit shift right specified in bits.
563 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
564 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
565 Shift / 8); // Shift is in bits.
566 } else if (Name == "llvm.x86.sse2.psll.dq.bs") {
567 // 128-bit shift left specified in bytes.
568 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
569 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
571 } else if (Name == "llvm.x86.sse2.psrl.dq.bs") {
572 // 128-bit shift right specified in bytes.
573 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
574 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 1,
576 } else if (Name == "llvm.x86.avx2.psll.dq.bs") {
577 // 256-bit shift left specified in bytes.
578 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
579 Rep = UpgradeX86PSLLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
581 } else if (Name == "llvm.x86.avx2.psrl.dq.bs") {
582 // 256-bit shift right specified in bytes.
583 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
584 Rep = UpgradeX86PSRLDQIntrinsics(Builder, C, CI->getArgOperand(0), 2,
586 } else if (Name == "llvm.x86.sse41.pblendw" ||
587 Name == "llvm.x86.sse41.blendpd" ||
588 Name == "llvm.x86.sse41.blendps" ||
589 Name == "llvm.x86.avx.blend.pd.256" ||
590 Name == "llvm.x86.avx.blend.ps.256" ||
591 Name == "llvm.x86.avx2.pblendw" ||
592 Name == "llvm.x86.avx2.pblendd.128" ||
593 Name == "llvm.x86.avx2.pblendd.256") {
594 Value *Op0 = CI->getArgOperand(0);
595 Value *Op1 = CI->getArgOperand(1);
596 unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
597 VectorType *VecTy = cast<VectorType>(CI->getType());
598 unsigned NumElts = VecTy->getNumElements();
600 SmallVector<Constant*, 16> Idxs;
601 for (unsigned i = 0; i != NumElts; ++i) {
602 unsigned Idx = ((Imm >> (i%8)) & 1) ? i + NumElts : i;
603 Idxs.push_back(Builder.getInt32(Idx));
606 Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
607 } else if (Name == "llvm.x86.avx.vinsertf128.pd.256" ||
608 Name == "llvm.x86.avx.vinsertf128.ps.256" ||
609 Name == "llvm.x86.avx.vinsertf128.si.256" ||
610 Name == "llvm.x86.avx2.vinserti128") {
611 Value *Op0 = CI->getArgOperand(0);
612 Value *Op1 = CI->getArgOperand(1);
613 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
614 VectorType *VecTy = cast<VectorType>(CI->getType());
615 unsigned NumElts = VecTy->getNumElements();
617 // Mask off the high bits of the immediate value; hardware ignores those.
620 // Extend the second operand into a vector that is twice as big.
621 Value *UndefV = UndefValue::get(Op1->getType());
622 SmallVector<Constant*, 8> Idxs;
623 for (unsigned i = 0; i != NumElts; ++i) {
624 Idxs.push_back(Builder.getInt32(i));
626 Rep = Builder.CreateShuffleVector(Op1, UndefV, ConstantVector::get(Idxs));
628 // Insert the second operand into the first operand.
630 // Note that there is no guarantee that instruction lowering will actually
631 // produce a vinsertf128 instruction for the created shuffles. In
632 // particular, the 0 immediate case involves no lane changes, so it can
633 // be handled as a blend.
635 // Example of shuffle mask for 32-bit elements:
636 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
637 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
639 SmallVector<Constant*, 8> Idxs2;
640 // The low half of the result is either the low half of the 1st operand
641 // or the low half of the 2nd operand (the inserted vector).
642 for (unsigned i = 0; i != NumElts / 2; ++i) {
643 unsigned Idx = Imm ? i : (i + NumElts);
644 Idxs2.push_back(Builder.getInt32(Idx));
646 // The high half of the result is either the low half of the 2nd operand
647 // (the inserted vector) or the high half of the 1st operand.
648 for (unsigned i = NumElts / 2; i != NumElts; ++i) {
649 unsigned Idx = Imm ? (i + NumElts / 2) : i;
650 Idxs2.push_back(Builder.getInt32(Idx));
652 Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
653 } else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
654 Name == "llvm.x86.avx.vextractf128.ps.256" ||
655 Name == "llvm.x86.avx.vextractf128.si.256" ||
656 Name == "llvm.x86.avx2.vextracti128") {
657 Value *Op0 = CI->getArgOperand(0);
658 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
659 VectorType *VecTy = cast<VectorType>(CI->getType());
660 unsigned NumElts = VecTy->getNumElements();
662 // Mask off the high bits of the immediate value; hardware ignores those.
665 // Get indexes for either the high half or low half of the input vector.
666 SmallVector<Constant*, 4> Idxs(NumElts);
667 for (unsigned i = 0; i != NumElts; ++i) {
668 unsigned Idx = Imm ? (i + NumElts) : i;
669 Idxs[i] = Builder.getInt32(Idx);
672 Value *UndefV = UndefValue::get(Op0->getType());
673 Rep = Builder.CreateShuffleVector(Op0, UndefV, ConstantVector::get(Idxs));
675 bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
676 if (Name == "llvm.x86.avx.vpermil.pd.256")
678 else if (Name == "llvm.x86.avx.vpermil.pd")
680 else if (Name == "llvm.x86.avx.vpermil.ps.256")
682 else if (Name == "llvm.x86.avx.vpermil.ps")
685 if (PD256 || PD128 || PS256 || PS128) {
686 Value *Op0 = CI->getArgOperand(0);
687 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
688 SmallVector<Constant*, 8> Idxs;
691 for (unsigned i = 0; i != 2; ++i)
692 Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1));
694 for (unsigned l = 0; l != 4; l+=2)
695 for (unsigned i = 0; i != 2; ++i)
696 Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l));
698 for (unsigned i = 0; i != 4; ++i)
699 Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3));
701 for (unsigned l = 0; l != 8; l+=4)
702 for (unsigned i = 0; i != 4; ++i)
703 Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l));
705 llvm_unreachable("Unexpected function");
707 Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs));
709 llvm_unreachable("Unknown function for CallInst upgrade.");
713 CI->replaceAllUsesWith(Rep);
714 CI->eraseFromParent();
718 std::string Name = CI->getName();
720 CI->setName(Name + ".old");
722 switch (NewFn->getIntrinsicID()) {
724 llvm_unreachable("Unknown function for CallInst upgrade.");
726 case Intrinsic::ctlz:
727 case Intrinsic::cttz:
728 assert(CI->getNumArgOperands() == 1 &&
729 "Mismatch between function args and call args");
730 CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
731 Builder.getFalse(), Name));
732 CI->eraseFromParent();
735 case Intrinsic::objectsize:
736 CI->replaceAllUsesWith(Builder.CreateCall2(NewFn,
737 CI->getArgOperand(0),
738 CI->getArgOperand(1),
740 CI->eraseFromParent();
743 case Intrinsic::ctpop: {
744 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(0)));
745 CI->eraseFromParent();
749 case Intrinsic::x86_xop_vfrcz_ss:
750 case Intrinsic::x86_xop_vfrcz_sd:
751 CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(1),
753 CI->eraseFromParent();
756 case Intrinsic::x86_sse41_ptestc:
757 case Intrinsic::x86_sse41_ptestz:
758 case Intrinsic::x86_sse41_ptestnzc: {
759 // The arguments for these intrinsics used to be v4f32, and changed
760 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
761 // So, the only thing required is a bitcast for both arguments.
762 // First, check the arguments have the old type.
763 Value *Arg0 = CI->getArgOperand(0);
764 if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
767 // Old intrinsic, add bitcasts
768 Value *Arg1 = CI->getArgOperand(1);
771 Builder.CreateBitCast(Arg0,
772 VectorType::get(Type::getInt64Ty(C), 2),
775 Builder.CreateBitCast(Arg1,
776 VectorType::get(Type::getInt64Ty(C), 2),
779 CallInst* NewCall = Builder.CreateCall2(NewFn, BC0, BC1, Name);
780 CI->replaceAllUsesWith(NewCall);
781 CI->eraseFromParent();
785 case Intrinsic::x86_sse41_insertps:
786 case Intrinsic::x86_sse41_dppd:
787 case Intrinsic::x86_sse41_dpps:
788 case Intrinsic::x86_sse41_mpsadbw:
789 case Intrinsic::x86_avx_dp_ps_256:
790 case Intrinsic::x86_avx2_mpsadbw: {
791 // Need to truncate the last argument from i32 to i8 -- this argument models
792 // an inherently 8-bit immediate operand to these x86 instructions.
793 SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
794 CI->arg_operands().end());
796 // Replace the last argument with a trunc.
797 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
799 CallInst *NewCall = Builder.CreateCall(NewFn, Args);
800 CI->replaceAllUsesWith(NewCall);
801 CI->eraseFromParent();
804 case Intrinsic::x86_avx512_mask_cmp_ps_512:
805 case Intrinsic::x86_avx512_mask_cmp_pd_512: {
806 // Need to truncate the last argument from i32 to i8 -- this argument models
807 // an inherently 8-bit immediate operand to these x86 instructions.
808 SmallVector<Value *, 5> Args(CI->arg_operands().begin(),
809 CI->arg_operands().end());
811 // Replace the last argument with a trunc.
812 Args[2] = Builder.CreateTrunc(Args[2], Type::getInt8Ty(C), "trunc");
814 CallInst *NewCall = Builder.CreateCall(NewFn, Args);
815 CI->replaceAllUsesWith(NewCall);
816 CI->eraseFromParent();
822 // This tests each Function to determine if it needs upgrading. When we find
823 // one we are interested in, we then upgrade all calls to reflect the new
825 void llvm::UpgradeCallsToIntrinsic(Function* F) {
826 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
828 // Upgrade the function and check if it is a totaly new function.
830 if (UpgradeIntrinsicFunction(F, NewFn)) {
832 // Replace all uses to the old function with the new one if necessary.
833 for (Value::user_iterator UI = F->user_begin(), UE = F->user_end();
835 if (CallInst *CI = dyn_cast<CallInst>(*UI++))
836 UpgradeIntrinsicCall(CI, NewFn);
838 // Remove old function, no longer used, from the module.
839 F->eraseFromParent();
844 void llvm::UpgradeInstWithTBAATag(Instruction *I) {
845 MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa);
846 assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
847 // Check if the tag uses struct-path aware TBAA format.
848 if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3)
851 if (MD->getNumOperands() == 3) {
852 Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)};
853 MDNode *ScalarType = MDNode::get(I->getContext(), Elts);
854 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
855 Metadata *Elts2[] = {ScalarType, ScalarType,
856 ConstantAsMetadata::get(Constant::getNullValue(
857 Type::getInt64Ty(I->getContext()))),
859 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2));
861 // Create a MDNode <MD, MD, offset 0>
862 Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue(
863 Type::getInt64Ty(I->getContext())))};
864 I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts));
868 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
869 Instruction *&Temp) {
870 if (Opc != Instruction::BitCast)
874 Type *SrcTy = V->getType();
875 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
876 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
877 LLVMContext &Context = V->getContext();
879 // We have no information about target data layout, so we assume that
880 // the maximum pointer size is 64bit.
881 Type *MidTy = Type::getInt64Ty(Context);
882 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
884 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
890 Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
891 if (Opc != Instruction::BitCast)
894 Type *SrcTy = C->getType();
895 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
896 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
897 LLVMContext &Context = C->getContext();
899 // We have no information about target data layout, so we assume that
900 // the maximum pointer size is 64bit.
901 Type *MidTy = Type::getInt64Ty(Context);
903 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
910 /// Check the debug info version number, if it is out-dated, drop the debug
911 /// info. Return true if module is modified.
912 bool llvm::UpgradeDebugInfo(Module &M) {
913 unsigned Version = getDebugMetadataVersionFromModule(M);
914 if (Version == DEBUG_METADATA_VERSION)
917 bool RetCode = StripDebugInfo(M);
919 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
920 M.getContext().diagnose(DiagVersion);
925 void llvm::UpgradeMDStringConstant(std::string &String) {
926 const std::string OldPrefix = "llvm.vectorizer.";
927 if (String == "llvm.vectorizer.unroll") {
928 String = "llvm.loop.interleave.count";
929 } else if (String.find(OldPrefix) == 0) {
930 String.replace(0, OldPrefix.size(), "llvm.loop.vectorize.");