Inline the checks for mutually exclusive attributes since they're used in only one...
[oota-llvm.git] / lib / VMCore / AutoUpgrade.cpp
index 7b4a8f7ddf41a0d1b20d0e79028be23a9594139d..094ca755132c006e83977a43dbd4aff467ce33df 100644 (file)
 #include "llvm/AutoUpgrade.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
 #include "llvm/Instruction.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/CallSite.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/IRBuilder.h"
 #include <cstring>
 using namespace llvm;
 
+// Upgrade the declarations of the SSE4.1 functions whose arguments have
+// changed their type from v4f32 to v2i64.
+static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
+                                 Function *&NewFn) {
+  // Check whether this is an old version of the function, which received
+  // v4f32 arguments.
+  Type *Arg0Type = F->getFunctionType()->getParamType(0);
+  if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
+    return false;
+
+  // Yes, it's old, replace it with new version.
+  F->setName(F->getName() + ".old");
+  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
+  return true;
+}
 
 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
   assert(F && "Illegal to upgrade a non-existent Function.");
@@ -38,112 +50,92 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
     return false;
   Name = Name.substr(5); // Strip off "llvm."
 
-  FunctionType *FTy = F->getFunctionType();
-  Module *M = F->getParent();
-  
   switch (Name[0]) {
   default: break;
-  case 'a':
-    if (Name.startswith("atomic.cmp.swap") ||
-        Name.startswith("atomic.swap") ||
-        Name.startswith("atomic.load.add") ||
-        Name.startswith("atomic.load.sub") ||
-        Name.startswith("atomic.load.and") ||
-        Name.startswith("atomic.load.nand") ||
-        Name.startswith("atomic.load.or") ||
-        Name.startswith("atomic.load.xor") ||
-        Name.startswith("atomic.load.max") ||
-        Name.startswith("atomic.load.min") ||
-        Name.startswith("atomic.load.umax") ||
-        Name.startswith("atomic.load.umin"))
+  case 'a': {
+    if (Name.startswith("arm.neon.vclz")) {
+      Type* args[2] = {
+        F->arg_begin()->getType(), 
+        Type::getInt1Ty(F->getContext())
+      };
+      // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
+      // the end of the name. Change name from llvm.arm.neon.vclz.* to
+      //  llvm.ctlz.*
+      FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
+      NewFn = Function::Create(fType, F->getLinkage(), 
+                               "llvm.ctlz." + Name.substr(14), F->getParent());
       return true;
-  case 'i':
-    //  This upgrades the old llvm.init.trampoline to the new
-    //  llvm.init.trampoline and llvm.adjust.trampoline pair.
-    if (Name == "init.trampoline") {
-      // The new llvm.init.trampoline returns nothing.
-      if (FTy->getReturnType()->isVoidTy())
-        break;
-
-      assert(FTy->getNumParams() == 3 && "old init.trampoline takes 3 args!");
-
-      // Change the name of the old intrinsic so that we can play with its type.
-      std::string NameTmp = F->getName();
-      F->setName("");
-      NewFn = cast<Function>(M->getOrInsertFunction(
-                               NameTmp,
-                               Type::getVoidTy(M->getContext()),
-                               FTy->getParamType(0), FTy->getParamType(1),
-                               FTy->getParamType(2), (Type *)0));
+    }
+    if (Name.startswith("arm.neon.vcnt")) {
+      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
+                                        F->arg_begin()->getType());
       return true;
     }
-  case 'm':
-    if (Name == "memory.barrier")
+    break;
+  }
+  case 'c': {
+    if (Name.startswith("ctlz.") && F->arg_size() == 1) {
+      F->setName(Name + ".old");
+      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
+                                        F->arg_begin()->getType());
       return true;
-  case 'p':
-    //  This upgrades the llvm.prefetch intrinsic to accept one more parameter,
-    //  which is a instruction / data cache identifier. The old version only
-    //  implicitly accepted the data version.
-    if (Name == "prefetch") {
-      // Don't do anything if it has the correct number of arguments already
-      if (FTy->getNumParams() == 4)
-        break;
-
-      assert(FTy->getNumParams() == 3 && "old prefetch takes 3 args!");
-      //  We first need to change the name of the old (bad) intrinsic, because
-      //  its type is incorrect, but we cannot overload that name. We
-      //  arbitrarily unique it here allowing us to construct a correctly named
-      //  and typed function below.
-      std::string NameTmp = F->getName();
-      F->setName("");
-      NewFn = cast<Function>(M->getOrInsertFunction(NameTmp,
-                                                    FTy->getReturnType(),
-                                                    FTy->getParamType(0),
-                                                    FTy->getParamType(1),
-                                                    FTy->getParamType(2),
-                                                    FTy->getParamType(2),
-                                                    (Type*)0));
+    }
+    if (Name.startswith("cttz.") && F->arg_size() == 1) {
+      F->setName(Name + ".old");
+      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
+                                        F->arg_begin()->getType());
       return true;
     }
-
     break;
+  }
   case 'x': {
-    const char *NewFnName = NULL;
-    // This fixes the poorly named crc32 intrinsics.
-    if (Name == "x86.sse42.crc32.8")
-      NewFnName = "llvm.x86.sse42.crc32.32.8";
-    else if (Name == "x86.sse42.crc32.16")
-      NewFnName = "llvm.x86.sse42.crc32.32.16";
-    else if (Name == "x86.sse42.crc32.32")
-      NewFnName = "llvm.x86.sse42.crc32.32.32";
-    else if (Name == "x86.sse42.crc64.8")
-      NewFnName = "llvm.x86.sse42.crc32.64.8";
-    else if (Name == "x86.sse42.crc64.64")
-      NewFnName = "llvm.x86.sse42.crc32.64.64";
-    
-    if (NewFnName) {
-      F->setName(NewFnName);
-      NewFn = F;
+    if (Name.startswith("x86.sse2.pcmpeq.") ||
+        Name.startswith("x86.sse2.pcmpgt.") ||
+        Name.startswith("x86.avx2.pcmpeq.") ||
+        Name.startswith("x86.avx2.pcmpgt.") ||
+        Name.startswith("x86.avx.vpermil.") ||
+        Name == "x86.avx.movnt.dq.256" ||
+        Name == "x86.avx.movnt.pd.256" ||
+        Name == "x86.avx.movnt.ps.256" ||
+        (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
+      NewFn = 0;
       return true;
     }
-
-    // Calls to these instructions are transformed into unaligned loads.
-    if (Name == "x86.sse.loadu.ps" || Name == "x86.sse2.loadu.dq" ||
-        Name == "x86.sse2.loadu.pd")
+    // SSE4.1 ptest functions may have an old signature.
+    if (Name.startswith("x86.sse41.ptest")) {
+      if (Name == "x86.sse41.ptestc")
+        return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
+      if (Name == "x86.sse41.ptestz")
+        return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
+      if (Name == "x86.sse41.ptestnzc")
+        return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
+    }
+    // frcz.ss/sd may need to have an argument dropped
+    if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
+      F->setName(Name + ".old");
+      NewFn = Intrinsic::getDeclaration(F->getParent(),
+                                        Intrinsic::x86_xop_vfrcz_ss);
       return true;
-      
-    // Calls to these instructions are transformed into nontemporal stores.
-    if (Name == "x86.sse.movnt.ps"  || Name == "x86.sse2.movnt.dq" ||
-        Name == "x86.sse2.movnt.pd" || Name == "x86.sse2.movnt.i")
+    }
+    if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
+      F->setName(Name + ".old");
+      NewFn = Intrinsic::getDeclaration(F->getParent(),
+                                        Intrinsic::x86_xop_vfrcz_sd);
       return true;
-
+    }
+    // Fix the FMA4 intrinsics to remove the 4
+    if (Name.startswith("x86.fma4.")) {
+      F->setName("llvm.x86.fma" + Name.substr(8));
+      NewFn = F;
+      return true;
+    }
     break;
   }
   }
 
-  //  This may not belong here. This function is effectively being overloaded 
-  //  to both detect an intrinsic which needs upgrading, and to provide the 
-  //  upgraded form of the intrinsic. We should perhaps have two separate 
+  //  This may not belong here. This function is effectively being overloaded
+  //  to both detect an intrinsic which needs upgrading, and to provide the
+  //  upgraded form of the intrinsic. We should perhaps have two separate
   //  functions for this.
   return false;
 }
@@ -165,43 +157,38 @@ bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
   return false;
 }
 
-// UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the 
-// upgraded intrinsic. All argument and return casting must be provided in 
+// UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
+// upgraded intrinsic. All argument and return casting must be provided in
 // order to seamlessly integrate with existing context.
 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
   Function *F = CI->getCalledFunction();
   LLVMContext &C = CI->getContext();
-  ImmutableCallSite CS(CI);
+  IRBuilder<> Builder(C);
+  Builder.SetInsertPoint(CI->getParent(), CI);
 
-  assert(F && "CallInst has no function associated with it.");
+  assert(F && "Intrinsic call is not direct?");
 
   if (!NewFn) {
-    if (F->getName() == "llvm.x86.sse.loadu.ps" ||
-        F->getName() == "llvm.x86.sse2.loadu.dq" ||
-        F->getName() == "llvm.x86.sse2.loadu.pd") {
-      // Convert to a native, unaligned load.
-      Type *VecTy = CI->getType();
-      Type *IntTy = IntegerType::get(C, 128);
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-
-      Value *BC = Builder.CreateBitCast(CI->getArgOperand(0),
-                                        PointerType::getUnqual(IntTy),
-                                        "cast");
-      LoadInst *LI = Builder.CreateLoad(BC, CI->getName());
-      LI->setAlignment(1);      // Unaligned load.
-      BC = Builder.CreateBitCast(LI, VecTy, "new.cast");
-
-      // Fix up all the uses with our new load.
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(BC);
-
-      // Remove intrinsic.
-      CI->eraseFromParent();
-    } else if (F->getName() == "llvm.x86.sse.movnt.ps" ||
-               F->getName() == "llvm.x86.sse2.movnt.dq" ||
-               F->getName() == "llvm.x86.sse2.movnt.pd" ||
-               F->getName() == "llvm.x86.sse2.movnt.i") {
+    // Get the Function's name.
+    StringRef Name = F->getName();
+
+    Value *Rep;
+    // Upgrade packed integer vector compares intrinsics to compare instructions
+    if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
+        Name.startswith("llvm.x86.avx2.pcmpeq.")) {
+      Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
+                                 "pcmpeq");
+      // need to sign extend since icmp returns vector of i1
+      Rep = Builder.CreateSExt(Rep, CI->getType(), "");
+    } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
+               Name.startswith("llvm.x86.avx2.pcmpgt.")) {
+      Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
+                                  "pcmpgt");
+      // need to sign extend since icmp returns vector of i1
+      Rep = Builder.CreateSExt(Rep, CI->getType(), "");
+    } else if (Name == "llvm.x86.avx.movnt.dq.256" ||
+               Name == "llvm.x86.avx.movnt.ps.256" ||
+               Name == "llvm.x86.avx.movnt.pd.256") {
       IRBuilder<> Builder(C);
       Builder.SetInsertPoint(CI->getParent(), CI);
 
@@ -223,135 +210,160 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
 
       // Remove intrinsic.
       CI->eraseFromParent();
-    } else if (F->getName().startswith("llvm.atomic.cmp.swap")) {
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-      Value *Val = Builder.CreateAtomicCmpXchg(CI->getArgOperand(0),
-                                               CI->getArgOperand(1),
-                                               CI->getArgOperand(2),
-                                               Monotonic);
-
-      // Replace intrinsic.
-      Val->takeName(CI);
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(Val);
-      CI->eraseFromParent();
-    } else if (F->getName().startswith("llvm.atomic")) {
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-
-      AtomicRMWInst::BinOp Op;
-      if (F->getName().startswith("llvm.atomic.swap"))
-        Op = AtomicRMWInst::Xchg;
-      else if (F->getName().startswith("llvm.atomic.load.add"))
-        Op = AtomicRMWInst::Add;
-      else if (F->getName().startswith("llvm.atomic.load.sub"))
-        Op = AtomicRMWInst::Sub;
-      else if (F->getName().startswith("llvm.atomic.load.and"))
-        Op = AtomicRMWInst::And;
-      else if (F->getName().startswith("llvm.atomic.load.nand"))
-        Op = AtomicRMWInst::Nand;
-      else if (F->getName().startswith("llvm.atomic.load.or"))
-        Op = AtomicRMWInst::Or;
-      else if (F->getName().startswith("llvm.atomic.load.xor"))
-        Op = AtomicRMWInst::Xor;
-      else if (F->getName().startswith("llvm.atomic.load.max"))
-        Op = AtomicRMWInst::Max;
-      else if (F->getName().startswith("llvm.atomic.load.min"))
-        Op = AtomicRMWInst::Min;
-      else if (F->getName().startswith("llvm.atomic.load.umax"))
-        Op = AtomicRMWInst::UMax;
-      else if (F->getName().startswith("llvm.atomic.load.umin"))
-        Op = AtomicRMWInst::UMin;
+      return;
+    } else if (Name.startswith("llvm.x86.xop.vpcom")) {
+      Intrinsic::ID intID;
+      if (Name.endswith("ub"))
+        intID = Intrinsic::x86_xop_vpcomub;
+      else if (Name.endswith("uw"))
+        intID = Intrinsic::x86_xop_vpcomuw;
+      else if (Name.endswith("ud"))
+        intID = Intrinsic::x86_xop_vpcomud;
+      else if (Name.endswith("uq"))
+        intID = Intrinsic::x86_xop_vpcomuq;
+      else if (Name.endswith("b"))
+        intID = Intrinsic::x86_xop_vpcomb;
+      else if (Name.endswith("w"))
+        intID = Intrinsic::x86_xop_vpcomw;
+      else if (Name.endswith("d"))
+        intID = Intrinsic::x86_xop_vpcomd;
+      else if (Name.endswith("q"))
+        intID = Intrinsic::x86_xop_vpcomq;
       else
-        llvm_unreachable("Unknown atomic");
-
-      Value *Val = Builder.CreateAtomicRMW(Op, CI->getArgOperand(0),
-                                           CI->getArgOperand(1),
-                                           Monotonic);
-
-      // Replace intrinsic.
-      Val->takeName(CI);
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(Val);
-      CI->eraseFromParent();
-    } else if (F->getName() == "llvm.memory.barrier") {
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-
-      // Note that this conversion ignores the "device" bit; it was not really
-      // well-defined, and got abused because nobody paid enough attention to
-      // get it right. In practice, this probably doesn't matter; application
-      // code generally doesn't need anything stronger than
-      // SequentiallyConsistent (and realistically, SequentiallyConsistent
-      // is lowered to a strong enough barrier for almost anything).
-
-      if (cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue())
-        Builder.CreateFence(SequentiallyConsistent);
-      else if (!cast<ConstantInt>(CI->getArgOperand(0))->getZExtValue())
-        Builder.CreateFence(Release);
-      else if (!cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue())
-        Builder.CreateFence(Acquire);
+        llvm_unreachable("Unknown suffix");
+
+      Name = Name.substr(18); // strip off "llvm.x86.xop.vpcom"
+      unsigned Imm;
+      if (Name.startswith("lt"))
+        Imm = 0;
+      else if (Name.startswith("le"))
+        Imm = 1;
+      else if (Name.startswith("gt"))
+        Imm = 2;
+      else if (Name.startswith("ge"))
+        Imm = 3;
+      else if (Name.startswith("eq"))
+        Imm = 4;
+      else if (Name.startswith("ne"))
+        Imm = 5;
+      else if (Name.startswith("true"))
+        Imm = 6;
+      else if (Name.startswith("false"))
+        Imm = 7;
       else
-        Builder.CreateFence(AcquireRelease);
+        llvm_unreachable("Unknown condition");
 
-      // Remove intrinsic.
-      CI->eraseFromParent();
+      Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID);
+      Rep = Builder.CreateCall3(VPCOM, CI->getArgOperand(0),
+                                CI->getArgOperand(1), Builder.getInt8(Imm));
     } else {
-      llvm_unreachable("Unknown function for CallInst upgrade.");
+      bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
+      if (Name == "llvm.x86.avx.vpermil.pd.256")
+        PD256 = true;
+      else if (Name == "llvm.x86.avx.vpermil.pd")
+        PD128 = true;
+      else if (Name == "llvm.x86.avx.vpermil.ps.256")
+        PS256 = true;
+      else if (Name == "llvm.x86.avx.vpermil.ps")
+        PS128 = true;
+
+      if (PD256 || PD128 || PS256 || PS128) {
+        Value *Op0 = CI->getArgOperand(0);
+        unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+        SmallVector<Constant*, 8> Idxs;
+
+        if (PD128)
+          for (unsigned i = 0; i != 2; ++i)
+            Idxs.push_back(Builder.getInt32((Imm >> i) & 0x1));
+        else if (PD256)
+          for (unsigned l = 0; l != 4; l+=2)
+            for (unsigned i = 0; i != 2; ++i)
+              Idxs.push_back(Builder.getInt32(((Imm >> (l+i)) & 0x1) + l));
+        else if (PS128)
+          for (unsigned i = 0; i != 4; ++i)
+            Idxs.push_back(Builder.getInt32((Imm >> (2 * i)) & 0x3));
+        else if (PS256)
+          for (unsigned l = 0; l != 8; l+=4)
+            for (unsigned i = 0; i != 4; ++i)
+              Idxs.push_back(Builder.getInt32(((Imm >> (2 * i)) & 0x3) + l));
+        else
+          llvm_unreachable("Unexpected function");
+
+        Rep = Builder.CreateShuffleVector(Op0, Op0, ConstantVector::get(Idxs));
+      } else {
+        llvm_unreachable("Unknown function for CallInst upgrade.");
+      }
     }
-    return;
-  }
 
-  switch (NewFn->getIntrinsicID()) {
-  case Intrinsic::prefetch: {
-    IRBuilder<> Builder(C);
-    Builder.SetInsertPoint(CI->getParent(), CI);
-    llvm::Type *I32Ty = llvm::Type::getInt32Ty(CI->getContext());
-
-    // Add the extra "data cache" argument
-    Value *Operands[4] = { CI->getArgOperand(0), CI->getArgOperand(1),
-                           CI->getArgOperand(2),
-                           llvm::ConstantInt::get(I32Ty, 1) };
-    CallInst *NewCI = CallInst::Create(NewFn, Operands,
-                                       CI->getName(), CI);
-    NewCI->setTailCall(CI->isTailCall());
-    NewCI->setCallingConv(CI->getCallingConv());
-    //  Handle any uses of the old CallInst.
-    if (!CI->use_empty())
-      //  Replace all uses of the old call with the new cast which has the
-      //  correct type.
-      CI->replaceAllUsesWith(NewCI);
-
-    //  Clean up the old call now that it has been completely upgraded.
+    CI->replaceAllUsesWith(Rep);
     CI->eraseFromParent();
-    break;
+    return;
   }
-  case Intrinsic::init_trampoline: {
 
-    //  Transform
-    //    %tramp = call i8* llvm.init.trampoline (i8* x, i8* y, i8* z)
-    //  to
-    //    call void llvm.init.trampoline (i8* %x, i8* %y, i8* %z)
-    //    %tramp = call i8* llvm.adjust.trampoline (i8* %x)
+  std::string Name = CI->getName().str();
+  CI->setName(Name + ".old");
 
-    Function *AdjustTrampolineFn =
-      cast<Function>(Intrinsic::getDeclaration(F->getParent(),
-                                               Intrinsic::adjust_trampoline));
+  switch (NewFn->getIntrinsicID()) {
+  default:
+    llvm_unreachable("Unknown function for CallInst upgrade.");
+
+  case Intrinsic::ctlz:
+  case Intrinsic::cttz:
+    assert(CI->getNumArgOperands() == 1 &&
+           "Mismatch between function args and call args");
+    CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
+                                               Builder.getFalse(), Name));
+    CI->eraseFromParent();
+    return;
 
-    IRBuilder<> Builder(C);
-    Builder.SetInsertPoint(CI);
+  case Intrinsic::arm_neon_vclz: {
+    // Change name from llvm.arm.neon.vclz.* to llvm.ctlz.*
+    CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
+                                               Builder.getFalse(),
+                                               "llvm.ctlz." + Name.substr(14)));
+    CI->eraseFromParent();
+    return;
+  }
+  case Intrinsic::ctpop: {
+    CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(0)));
+    CI->eraseFromParent();
+    return;
+  }
 
-    Builder.CreateCall3(NewFn, CI->getArgOperand(0), CI->getArgOperand(1),
-                        CI->getArgOperand(2));
+  case Intrinsic::x86_xop_vfrcz_ss:
+  case Intrinsic::x86_xop_vfrcz_sd:
+    CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(1),
+                                              Name));
+    CI->eraseFromParent();
+    return;
 
-    CallInst *AdjustCall = Builder.CreateCall(AdjustTrampolineFn,
-                                              CI->getArgOperand(0),
-                                              CI->getName());
-    if (!CI->use_empty())
-      CI->replaceAllUsesWith(AdjustCall);
+  case Intrinsic::x86_sse41_ptestc:
+  case Intrinsic::x86_sse41_ptestz:
+  case Intrinsic::x86_sse41_ptestnzc: {
+    // The arguments for these intrinsics used to be v4f32, and changed
+    // to v2i64. This is purely a nop, since those are bitwise intrinsics.
+    // So, the only thing required is a bitcast for both arguments.
+    // First, check the arguments have the old type.
+    Value *Arg0 = CI->getArgOperand(0);
+    if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
+      return;
+
+    // Old intrinsic, add bitcasts
+    Value *Arg1 = CI->getArgOperand(1);
+
+    Value *BC0 =
+      Builder.CreateBitCast(Arg0,
+                            VectorType::get(Type::getInt64Ty(C), 2),
+                            "cast");
+    Value *BC1 =
+      Builder.CreateBitCast(Arg1,
+                            VectorType::get(Type::getInt64Ty(C), 2),
+                            "cast");
+
+    CallInst* NewCall = Builder.CreateCall2(NewFn, BC0, BC1, Name);
+    CI->replaceAllUsesWith(NewCall);
     CI->eraseFromParent();
-    break;
+    return;
   }
   }
 }
@@ -378,45 +390,3 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) {
   }
 }
 
-/// This function strips all debug info intrinsics, except for llvm.dbg.declare.
-/// If an llvm.dbg.declare intrinsic is invalid, then this function simply
-/// strips that use.
-void llvm::CheckDebugInfoIntrinsics(Module *M) {
-  if (Function *FuncStart = M->getFunction("llvm.dbg.func.start")) {
-    while (!FuncStart->use_empty())
-      cast<CallInst>(FuncStart->use_back())->eraseFromParent();
-    FuncStart->eraseFromParent();
-  }
-  
-  if (Function *StopPoint = M->getFunction("llvm.dbg.stoppoint")) {
-    while (!StopPoint->use_empty())
-      cast<CallInst>(StopPoint->use_back())->eraseFromParent();
-    StopPoint->eraseFromParent();
-  }
-
-  if (Function *RegionStart = M->getFunction("llvm.dbg.region.start")) {
-    while (!RegionStart->use_empty())
-      cast<CallInst>(RegionStart->use_back())->eraseFromParent();
-    RegionStart->eraseFromParent();
-  }
-
-  if (Function *RegionEnd = M->getFunction("llvm.dbg.region.end")) {
-    while (!RegionEnd->use_empty())
-      cast<CallInst>(RegionEnd->use_back())->eraseFromParent();
-    RegionEnd->eraseFromParent();
-  }
-  
-  if (Function *Declare = M->getFunction("llvm.dbg.declare")) {
-    if (!Declare->use_empty()) {
-      DbgDeclareInst *DDI = cast<DbgDeclareInst>(Declare->use_back());
-      if (!isa<MDNode>(DDI->getArgOperand(0)) ||
-          !isa<MDNode>(DDI->getArgOperand(1))) {
-        while (!Declare->use_empty()) {
-          CallInst *CI = cast<CallInst>(Declare->use_back());
-          CI->eraseFromParent();
-        }
-        Declare->eraseFromParent();
-      }
-    }
-  }
-}