X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTransforms%2FInstrumentation%2FDataFlowSanitizer.cpp;h=9b9e725cde81bb8f45d429408ff8f2ef591ba898;hb=6a9b29ec9b42e792732659e510a655449a41b661;hp=af227d27d9208e33113675c3094c7734317842d0;hpb=a90d91fd1add17b3c6af09a845ede940595098e9;p=oota-llvm.git diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index af227d27d92..9b9e725cde8 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -48,6 +48,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/IRBuilder.h" @@ -76,22 +77,31 @@ static cl::opt ClPreserveAlignment( cl::desc("respect alignment requirements provided by input IR"), cl::Hidden, cl::init(false)); -// The greylist file controls how shadow parameters are passed. -// The program acts as though every function in the greylist is passed -// parameters with zero shadow and that its return value also has zero shadow. -// This avoids the use of TLS or extra function parameters to pass shadow state -// and essentially makes the function conform to the "native" (i.e. unsanitized) -// ABI. -static cl::opt ClGreylistFile( - "dfsan-greylist", - cl::desc("File containing the list of functions with a native ABI"), +// The ABI list file controls how shadow parameters are passed. The pass treats +// every function labelled "uninstrumented" in the ABI list file as conforming +// to the "native" (i.e. unsanitized) ABI. Unless the ABI list contains +// additional annotations for those functions, a call to one of those functions +// will produce a warning message, as the labelling behaviour of the function is +// unknown. The other supported annotations are "functional" and "discard", +// which are described below under DataFlowSanitizer::WrapperKind. +static cl::opt ClABIListFile( + "dfsan-abilist", + cl::desc("File listing native ABI functions and how the pass treats them"), cl::Hidden); +// Controls whether the pass uses IA_Args or IA_TLS as the ABI for instrumented +// functions (see DataFlowSanitizer::InstrumentedABI below). static cl::opt ClArgsABI( "dfsan-args-abi", cl::desc("Use the argument ABI rather than the TLS ABI"), cl::Hidden); +static cl::opt ClDebugNonzeroLabels( + "dfsan-debug-nonzero-labels", + cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, " + "load or return with a nonzero label"), + cl::Hidden); + namespace { class DataFlowSanitizer : public ModulePass { @@ -102,13 +112,42 @@ class DataFlowSanitizer : public ModulePass { ShadowWidth = 16 }; + /// Which ABI should be used for instrumented functions? enum InstrumentedABI { - IA_None, - IA_MemOnly, + /// Argument and return value labels are passed through additional + /// arguments and by modifying the return type. IA_Args, + + /// Argument and return value labels are passed through TLS variables + /// __dfsan_arg_tls and __dfsan_retval_tls. IA_TLS }; + /// How should calls to uninstrumented functions be handled? + enum WrapperKind { + /// This function is present in an uninstrumented form but we don't know + /// how it should be handled. Print a warning and call the function anyway. + /// Don't label the return value. + WK_Warning, + + /// This function does not write to (user-accessible) memory, and its return + /// value is unlabelled. + WK_Discard, + + /// This function does not write to (user-accessible) memory, and the label + /// of its return value is the union of the label of its arguments. + WK_Functional, + + /// Instead of calling the function, a custom wrapper __dfsw_F is called, + /// where F is the name of the function. This function may wrap the + /// original function or provide its own implementation. This is similar to + /// the IA_Args ABI, except that IA_Args uses a struct return type to + /// pass the return value shadow in a register, while WK_Custom uses an + /// extra pointer argument to return the shadow. This allows the wrapped + /// form of the function type to be expressed in C. + WK_Custom + }; + DataLayout *DL; Module *Mod; LLVMContext *Ctx; @@ -126,20 +165,37 @@ class DataFlowSanitizer : public ModulePass { Constant *GetRetvalTLS; FunctionType *DFSanUnionFnTy; FunctionType *DFSanUnionLoadFnTy; + FunctionType *DFSanUnimplementedFnTy; + FunctionType *DFSanSetLabelFnTy; + FunctionType *DFSanNonzeroLabelFnTy; Constant *DFSanUnionFn; Constant *DFSanUnionLoadFn; + Constant *DFSanUnimplementedFn; + Constant *DFSanSetLabelFn; + Constant *DFSanNonzeroLabelFn; MDNode *ColdCallWeights; - OwningPtr Greylist; + OwningPtr ABIList; DenseMap UnwrappedFnMap; + AttributeSet ReadOnlyNoneAttrs; Value *getShadowAddress(Value *Addr, Instruction *Pos); Value *combineShadows(Value *V1, Value *V2, Instruction *Pos); - FunctionType *getInstrumentedFunctionType(FunctionType *T); - InstrumentedABI getInstrumentedABI(Function *F); - InstrumentedABI getDefaultInstrumentedABI(); - -public: - DataFlowSanitizer(void *(*getArgTLS)() = 0, void *(*getRetValTLS)() = 0); + bool isInstrumented(const Function *F); + bool isInstrumented(const GlobalAlias *GA); + FunctionType *getArgsFunctionType(FunctionType *T); + FunctionType *getTrampolineFunctionType(FunctionType *T); + FunctionType *getCustomFunctionType(FunctionType *T); + InstrumentedABI getInstrumentedABI(); + WrapperKind getWrapperKind(Function *F); + void addGlobalNamePrefix(GlobalValue *GV); + Function *buildWrapperFunction(Function *F, StringRef NewFName, + GlobalValue::LinkageTypes NewFLink, + FunctionType *NewFT); + Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName); + + public: + DataFlowSanitizer(StringRef ABIListFile = StringRef(), + void *(*getArgTLS)() = 0, void *(*getRetValTLS)() = 0); static char ID; bool doInitialization(Module &M); bool runOnModule(Module &M); @@ -149,16 +205,20 @@ struct DFSanFunction { DataFlowSanitizer &DFS; Function *F; DataFlowSanitizer::InstrumentedABI IA; + bool IsNativeABI; Value *ArgTLSPtr; Value *RetvalTLSPtr; + AllocaInst *LabelReturnAlloca; DenseMap ValShadowMap; DenseMap AllocaShadowMap; std::vector > PHIFixups; DenseSet SkipInsts; + DenseSet NonZeroChecks; - DFSanFunction(DataFlowSanitizer &DFS, Function *F) - : DFS(DFS), F(F), IA(DFS.getInstrumentedABI(F)), ArgTLSPtr(0), - RetvalTLSPtr(0) {} + DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI) + : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), + IsNativeABI(IsNativeABI), ArgTLSPtr(0), RetvalTLSPtr(0), + LabelReturnAlloca(0) {} Value *getArgTLSPtr(); Value *getArgTLS(unsigned Index, Instruction *Pos); Value *getRetvalTLS(); @@ -172,7 +232,7 @@ struct DFSanFunction { }; class DFSanVisitor : public InstVisitor { -public: + public: DFSanFunction &DFSF; DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {} @@ -194,6 +254,7 @@ public: void visitInsertValueInst(InsertValueInst &I); void visitAllocaInst(AllocaInst &I); void visitSelectInst(SelectInst &I); + void visitMemSetInst(MemSetInst &I); void visitMemTransferInst(MemTransferInst &I); }; @@ -203,17 +264,21 @@ char DataFlowSanitizer::ID; INITIALIZE_PASS(DataFlowSanitizer, "dfsan", "DataFlowSanitizer: dynamic data flow analysis.", false, false) -ModulePass *llvm::createDataFlowSanitizerPass(void *(*getArgTLS)(), +ModulePass *llvm::createDataFlowSanitizerPass(StringRef ABIListFile, + void *(*getArgTLS)(), void *(*getRetValTLS)()) { - return new DataFlowSanitizer(getArgTLS, getRetValTLS); + return new DataFlowSanitizer(ABIListFile, getArgTLS, getRetValTLS); } -DataFlowSanitizer::DataFlowSanitizer(void *(*getArgTLS)(), +DataFlowSanitizer::DataFlowSanitizer(StringRef ABIListFile, + void *(*getArgTLS)(), void *(*getRetValTLS)()) : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS), - Greylist(SpecialCaseList::createOrDie(ClGreylistFile)) {} + ABIList(SpecialCaseList::createOrDie(ABIListFile.empty() ? ClABIListFile + : ABIListFile)) { +} -FunctionType *DataFlowSanitizer::getInstrumentedFunctionType(FunctionType *T) { +FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) { llvm::SmallVector ArgTypes; std::copy(T->param_begin(), T->param_end(), std::back_inserter(ArgTypes)); for (unsigned i = 0, e = T->getNumParams(); i != e; ++i) @@ -226,6 +291,41 @@ FunctionType *DataFlowSanitizer::getInstrumentedFunctionType(FunctionType *T) { return FunctionType::get(RetType, ArgTypes, T->isVarArg()); } +FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) { + assert(!T->isVarArg()); + llvm::SmallVector ArgTypes; + ArgTypes.push_back(T->getPointerTo()); + std::copy(T->param_begin(), T->param_end(), std::back_inserter(ArgTypes)); + for (unsigned i = 0, e = T->getNumParams(); i != e; ++i) + ArgTypes.push_back(ShadowTy); + Type *RetType = T->getReturnType(); + if (!RetType->isVoidTy()) + ArgTypes.push_back(ShadowPtrTy); + return FunctionType::get(T->getReturnType(), ArgTypes, false); +} + +FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) { + assert(!T->isVarArg()); + llvm::SmallVector ArgTypes; + for (FunctionType::param_iterator i = T->param_begin(), e = T->param_end(); + i != e; ++i) { + FunctionType *FT; + if (isa(*i) && (FT = dyn_cast(cast( + *i)->getElementType()))) { + ArgTypes.push_back(getTrampolineFunctionType(FT)->getPointerTo()); + ArgTypes.push_back(Type::getInt8PtrTy(*Ctx)); + } else { + ArgTypes.push_back(*i); + } + } + for (unsigned i = 0, e = T->getNumParams(); i != e; ++i) + ArgTypes.push_back(ShadowTy); + Type *RetType = T->getReturnType(); + if (!RetType->isVoidTy()) + ArgTypes.push_back(ShadowPtrTy); + return FunctionType::get(T->getReturnType(), ArgTypes, false); +} + bool DataFlowSanitizer::doInitialization(Module &M) { DL = getAnalysisIfAvailable(); if (!DL) @@ -246,6 +346,13 @@ bool DataFlowSanitizer::doInitialization(Module &M) { Type *DFSanUnionLoadArgs[2] = { ShadowPtrTy, IntptrTy }; DFSanUnionLoadFnTy = FunctionType::get(ShadowTy, DFSanUnionLoadArgs, /*isVarArg=*/ false); + DFSanUnimplementedFnTy = FunctionType::get( + Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false); + Type *DFSanSetLabelArgs[3] = { ShadowTy, Type::getInt8PtrTy(*Ctx), IntptrTy }; + DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx), + DFSanSetLabelArgs, /*isVarArg=*/false); + DFSanNonzeroLabelFnTy = FunctionType::get( + Type::getVoidTy(*Ctx), ArrayRef(), /*isVarArg=*/false); if (GetArgTLSPtr) { Type *ArgTLSTy = ArrayType::get(ShadowTy, 64); @@ -267,23 +374,115 @@ bool DataFlowSanitizer::doInitialization(Module &M) { return true; } -DataFlowSanitizer::InstrumentedABI -DataFlowSanitizer::getInstrumentedABI(Function *F) { - if (Greylist->isIn(*F)) - return IA_MemOnly; - else - return getDefaultInstrumentedABI(); +bool DataFlowSanitizer::isInstrumented(const Function *F) { + return !ABIList->isIn(*F, "uninstrumented"); +} + +bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) { + return !ABIList->isIn(*GA, "uninstrumented"); } -DataFlowSanitizer::InstrumentedABI -DataFlowSanitizer::getDefaultInstrumentedABI() { +DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() { return ClArgsABI ? IA_Args : IA_TLS; } +DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) { + if (ABIList->isIn(*F, "functional")) + return WK_Functional; + if (ABIList->isIn(*F, "discard")) + return WK_Discard; + if (ABIList->isIn(*F, "custom")) + return WK_Custom; + + return WK_Warning; +} + +void DataFlowSanitizer::addGlobalNamePrefix(GlobalValue *GV) { + std::string GVName = GV->getName(), Prefix = "dfs$"; + GV->setName(Prefix + GVName); + + // Try to change the name of the function in module inline asm. We only do + // this for specific asm directives, currently only ".symver", to try to avoid + // corrupting asm which happens to contain the symbol name as a substring. + // Note that the substitution for .symver assumes that the versioned symbol + // also has an instrumented name. + std::string Asm = GV->getParent()->getModuleInlineAsm(); + std::string SearchStr = ".symver " + GVName + ","; + size_t Pos = Asm.find(SearchStr); + if (Pos != std::string::npos) { + Asm.replace(Pos, SearchStr.size(), + ".symver " + Prefix + GVName + "," + Prefix); + GV->getParent()->setModuleInlineAsm(Asm); + } +} + +Function * +DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName, + GlobalValue::LinkageTypes NewFLink, + FunctionType *NewFT) { + FunctionType *FT = F->getFunctionType(); + Function *NewF = Function::Create(NewFT, NewFLink, NewFName, + F->getParent()); + NewF->copyAttributesFrom(F); + NewF->removeAttributes( + AttributeSet::ReturnIndex, + AttributeFuncs::typeIncompatible(NewFT->getReturnType(), + AttributeSet::ReturnIndex)); + + BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF); + std::vector Args; + unsigned n = FT->getNumParams(); + for (Function::arg_iterator ai = NewF->arg_begin(); n != 0; ++ai, --n) + Args.push_back(&*ai); + CallInst *CI = CallInst::Create(F, Args, "", BB); + if (FT->getReturnType()->isVoidTy()) + ReturnInst::Create(*Ctx, BB); + else + ReturnInst::Create(*Ctx, CI, BB); + + return NewF; +} + +Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT, + StringRef FName) { + FunctionType *FTT = getTrampolineFunctionType(FT); + Constant *C = Mod->getOrInsertFunction(FName, FTT); + Function *F = dyn_cast(C); + if (F && F->isDeclaration()) { + F->setLinkage(GlobalValue::LinkOnceODRLinkage); + BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F); + std::vector Args; + Function::arg_iterator AI = F->arg_begin(); ++AI; + for (unsigned N = FT->getNumParams(); N != 0; ++AI, --N) + Args.push_back(&*AI); + CallInst *CI = + CallInst::Create(&F->getArgumentList().front(), Args, "", BB); + ReturnInst *RI; + if (FT->getReturnType()->isVoidTy()) + RI = ReturnInst::Create(*Ctx, BB); + else + RI = ReturnInst::Create(*Ctx, CI, BB); + + DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true); + Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; ++ValAI; + for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N) + DFSF.ValShadowMap[ValAI] = ShadowAI; + DFSanVisitor(DFSF).visitCallInst(*CI); + if (!FT->getReturnType()->isVoidTy()) + new StoreInst(DFSF.getShadow(RI->getReturnValue()), + &F->getArgumentList().back(), RI); + } + + return C; +} + bool DataFlowSanitizer::runOnModule(Module &M) { if (!DL) return false; + if (ABIList->isIn(M, "skip")) + return false; + if (!GetArgTLSPtr) { Type *ArgTLSTy = ArrayType::get(ShadowTy, 64); ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy); @@ -308,33 +507,80 @@ bool DataFlowSanitizer::runOnModule(Module &M) { if (Function *F = dyn_cast(DFSanUnionLoadFn)) { F->addAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); } + DFSanUnimplementedFn = + Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy); + DFSanSetLabelFn = + Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy); + if (Function *F = dyn_cast(DFSanSetLabelFn)) { + F->addAttribute(1, Attribute::ZExt); + } + DFSanNonzeroLabelFn = + Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy); std::vector FnsToInstrument; + llvm::SmallPtrSet FnsWithNativeABI; for (Module::iterator i = M.begin(), e = M.end(); i != e; ++i) { - if (!i->isIntrinsic() && i != DFSanUnionFn && i != DFSanUnionLoadFn) + if (!i->isIntrinsic() && + i != DFSanUnionFn && + i != DFSanUnionLoadFn && + i != DFSanUnimplementedFn && + i != DFSanSetLabelFn && + i != DFSanNonzeroLabelFn) FnsToInstrument.push_back(&*i); } - // First, change the ABI of every function in the module. Greylisted + // Give function aliases prefixes when necessary, and build wrappers where the + // instrumentedness is inconsistent. + for (Module::alias_iterator i = M.alias_begin(), e = M.alias_end(); i != e;) { + GlobalAlias *GA = &*i; + ++i; + // Don't stop on weak. We assume people aren't playing games with the + // instrumentedness of overridden weak aliases. + if (Function *F = dyn_cast( + GA->resolveAliasedGlobal(/*stopOnWeak=*/false))) { + bool GAInst = isInstrumented(GA), FInst = isInstrumented(F); + if (GAInst && FInst) { + addGlobalNamePrefix(GA); + } else if (GAInst != FInst) { + // Non-instrumented alias of an instrumented function, or vice versa. + // Replace the alias with a native-ABI wrapper of the aliasee. The pass + // below will take care of instrumenting it. + Function *NewF = + buildWrapperFunction(F, "", GA->getLinkage(), F->getFunctionType()); + GA->replaceAllUsesWith(NewF); + NewF->takeName(GA); + GA->eraseFromParent(); + FnsToInstrument.push_back(NewF); + } + } + } + + AttrBuilder B; + B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone); + ReadOnlyNoneAttrs = AttributeSet::get(*Ctx, AttributeSet::FunctionIndex, B); + + // First, change the ABI of every function in the module. ABI-listed // functions keep their original ABI and get a wrapper function. for (std::vector::iterator i = FnsToInstrument.begin(), e = FnsToInstrument.end(); i != e; ++i) { Function &F = **i; - FunctionType *FT = F.getFunctionType(); - FunctionType *NewFT = getInstrumentedFunctionType(FT); - // If the function types are the same (i.e. void()), we don't need to do - // anything here. - if (FT != NewFT) { - switch (getInstrumentedABI(&F)) { - case IA_Args: { + + bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() && + FT->getReturnType()->isVoidTy()); + + if (isInstrumented(&F)) { + // Instrumented functions get a 'dfs$' prefix. This allows us to more + // easily identify cases of mismatching ABIs. + if (getInstrumentedABI() == IA_Args && !IsZeroArgsVoidRet) { + FunctionType *NewFT = getArgsFunctionType(FT); Function *NewF = Function::Create(NewFT, F.getLinkage(), "", &M); - NewF->setCallingConv(F.getCallingConv()); - NewF->setAttributes(F.getAttributes().removeAttributes( - *Ctx, AttributeSet::ReturnIndex, + NewF->copyAttributesFrom(&F); + NewF->removeAttributes( + AttributeSet::ReturnIndex, AttributeFuncs::typeIncompatible(NewFT->getReturnType(), - AttributeSet::ReturnIndex))); + AttributeSet::ReturnIndex)); for (Function::arg_iterator FArg = F.arg_begin(), NewFArg = NewF->arg_begin(), FArgEnd = F.arg_end(); @@ -358,41 +604,49 @@ bool DataFlowSanitizer::runOnModule(Module &M) { NewF->takeName(&F); F.eraseFromParent(); *i = NewF; - break; + addGlobalNamePrefix(NewF); + } else { + addGlobalNamePrefix(&F); } - case IA_MemOnly: { - assert(!FT->isVarArg() && "varargs not handled here yet"); - assert(getDefaultInstrumentedABI() == IA_Args); - Function *NewF = - Function::Create(NewFT, GlobalValue::LinkOnceODRLinkage, - std::string("dfsw$") + F.getName(), &M); - NewF->setCallingConv(F.getCallingConv()); - NewF->setAttributes(F.getAttributes()); - - BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF); - std::vector Args; - unsigned n = FT->getNumParams(); - for (Function::arg_iterator i = NewF->arg_begin(); n != 0; ++i, --n) - Args.push_back(&*i); - CallInst *CI = CallInst::Create(&F, Args, "", BB); - if (FT->getReturnType()->isVoidTy()) - ReturnInst::Create(*Ctx, BB); - else { - Value *InsVal = InsertValueInst::Create( - UndefValue::get(NewFT->getReturnType()), CI, 0, "", BB); - Value *InsShadow = - InsertValueInst::Create(InsVal, ZeroShadow, 1, "", BB); - ReturnInst::Create(*Ctx, InsShadow, BB); - } - - Value *WrappedFnCst = - ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT)); - F.replaceAllUsesWith(WrappedFnCst); - UnwrappedFnMap[WrappedFnCst] = &F; - break; - } - default: - break; + // Hopefully, nobody will try to indirectly call a vararg + // function... yet. + } else if (FT->isVarArg()) { + UnwrappedFnMap[&F] = &F; + *i = 0; + } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) { + // Build a wrapper function for F. The wrapper simply calls F, and is + // added to FnsToInstrument so that any instrumentation according to its + // WrapperKind is done in the second pass below. + FunctionType *NewFT = getInstrumentedABI() == IA_Args + ? getArgsFunctionType(FT) + : FT; + Function *NewF = buildWrapperFunction( + &F, std::string("dfsw$") + std::string(F.getName()), + GlobalValue::LinkOnceODRLinkage, NewFT); + if (getInstrumentedABI() == IA_TLS) + NewF->removeAttributes(AttributeSet::FunctionIndex, ReadOnlyNoneAttrs); + + Value *WrappedFnCst = + ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT)); + F.replaceAllUsesWith(WrappedFnCst); + UnwrappedFnMap[WrappedFnCst] = &F; + *i = NewF; + + if (!F.isDeclaration()) { + // This function is probably defining an interposition of an + // uninstrumented function and hence needs to keep the original ABI. + // But any functions it may call need to use the instrumented ABI, so + // we instrument it in a mode which preserves the original ABI. + FnsWithNativeABI.insert(&F); + + // This code needs to rebuild the iterators, as they may be invalidated + // by the push_back, taking care that the new range does not include + // any functions added by this code. + size_t N = i - FnsToInstrument.begin(), + Count = e - FnsToInstrument.begin(); + FnsToInstrument.push_back(&F); + i = FnsToInstrument.begin() + N; + e = FnsToInstrument.begin() + Count; } } } @@ -400,12 +654,12 @@ bool DataFlowSanitizer::runOnModule(Module &M) { for (std::vector::iterator i = FnsToInstrument.begin(), e = FnsToInstrument.end(); i != e; ++i) { - if ((*i)->isDeclaration()) + if (!*i || (*i)->isDeclaration()) continue; removeUnreachableBlocks(**i); - DFSanFunction DFSF(*this, *i); + DFSanFunction DFSF(*this, *i, FnsWithNativeABI.count(*i)); // DFSanVisitor may create new basic blocks, which confuses df_iterator. // Build a copy of the list before iterating over it. @@ -433,6 +687,10 @@ bool DataFlowSanitizer::runOnModule(Module &M) { } } + // We will not necessarily be able to compute the shadow for every phi node + // until we have visited every block. Therefore, the code that handles phi + // nodes adds them to the PHIFixups list so that they can be properly + // handled here. for (std::vector >::iterator i = DFSF.PHIFixups.begin(), e = DFSF.PHIFixups.end(); @@ -443,6 +701,31 @@ bool DataFlowSanitizer::runOnModule(Module &M) { val, DFSF.getShadow(i->first->getIncomingValue(val))); } } + + // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy + // places (i.e. instructions in basic blocks we haven't even begun visiting + // yet). To make our life easier, do this work in a pass after the main + // instrumentation. + if (ClDebugNonzeroLabels) { + for (DenseSet::iterator i = DFSF.NonZeroChecks.begin(), + e = DFSF.NonZeroChecks.end(); + i != e; ++i) { + Instruction *Pos; + if (Instruction *I = dyn_cast(*i)) + Pos = I->getNextNode(); + else + Pos = DFSF.F->getEntryBlock().begin(); + while (isa(Pos) || isa(Pos)) + Pos = Pos->getNextNode(); + IRBuilder<> IRB(Pos); + Instruction *NeInst = cast( + IRB.CreateICmpNE(*i, DFSF.DFS.ZeroShadow)); + BranchInst *BI = cast(SplitBlockAndInsertIfThen( + NeInst, /*Unreachable=*/ false, ColdCallWeights)); + IRBuilder<> ThenIRB(BI); + ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn); + } + } } return false; @@ -479,6 +762,8 @@ Value *DFSanFunction::getShadow(Value *V) { Value *&Shadow = ValShadowMap[V]; if (!Shadow) { if (Argument *A = dyn_cast(V)) { + if (IsNativeABI) + return DFS.ZeroShadow; switch (IA) { case DataFlowSanitizer::IA_TLS: { Value *ArgTLSPtr = getArgTLSPtr(); @@ -495,12 +780,11 @@ Value *DFSanFunction::getShadow(Value *V) { while (ArgIdx--) ++i; Shadow = i; + assert(Shadow->getType() == DFS.ShadowTy); break; } - default: - Shadow = DFS.ZeroShadow; - break; } + NonZeroChecks.insert(Shadow); } else { Shadow = DFS.ZeroShadow; } @@ -550,7 +834,7 @@ Value *DataFlowSanitizer::combineShadows(Value *V1, Value *V2, BasicBlock *Tail = BI->getSuccessor(0); PHINode *Phi = PHINode::Create(ShadowTy, 2, "", Tail->begin()); Phi->addIncoming(Call, Call->getParent()); - Phi->addIncoming(ZeroShadow, Head); + Phi->addIncoming(V1, Head); Pos = Phi; return Phi; } else { @@ -697,7 +981,11 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) { Value *LoadedShadow = DFSF.loadShadow(LI.getPointerOperand(), Size, Align, &LI); Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand()); - DFSF.setShadow(&LI, DFSF.DFS.combineShadows(LoadedShadow, PtrShadow, &LI)); + Value *CombinedShadow = DFSF.DFS.combineShadows(LoadedShadow, PtrShadow, &LI); + if (CombinedShadow != DFSF.DFS.ZeroShadow) + DFSF.NonZeroChecks.insert(CombinedShadow); + + DFSF.setShadow(&LI, CombinedShadow); } void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align, @@ -842,6 +1130,15 @@ void DFSanVisitor::visitSelectInst(SelectInst &I) { } } +void DFSanVisitor::visitMemSetInst(MemSetInst &I) { + IRBuilder<> IRB(&I); + Value *ValShadow = DFSF.getShadow(I.getValue()); + IRB.CreateCall3( + DFSF.DFS.DFSanSetLabelFn, ValShadow, + IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(*DFSF.DFS.Ctx)), + IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)); +} + void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) { IRBuilder<> IRB(&I); Value *DestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I); @@ -866,7 +1163,7 @@ void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) { } void DFSanVisitor::visitReturnInst(ReturnInst &RI) { - if (RI.getReturnValue()) { + if (!DFSF.IsNativeABI && RI.getReturnValue()) { switch (DFSF.IA) { case DataFlowSanitizer::IA_TLS: { Value *S = DFSF.getShadow(RI.getReturnValue()); @@ -884,8 +1181,6 @@ void DFSanVisitor::visitReturnInst(ReturnInst &RI) { RI.setOperand(0, InsShadow); break; } - default: - break; } } } @@ -897,19 +1192,107 @@ void DFSanVisitor::visitCallSite(CallSite CS) { return; } + IRBuilder<> IRB(CS.getInstruction()); + DenseMap::iterator i = DFSF.DFS.UnwrappedFnMap.find(CS.getCalledValue()); if (i != DFSF.DFS.UnwrappedFnMap.end()) { - CS.setCalledFunction(i->second); - DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow); - return; - } + Function *F = i->second; + switch (DFSF.DFS.getWrapperKind(F)) { + case DataFlowSanitizer::WK_Warning: { + CS.setCalledFunction(F); + IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn, + IRB.CreateGlobalStringPtr(F->getName())); + DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow); + return; + } + case DataFlowSanitizer::WK_Discard: { + CS.setCalledFunction(F); + DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow); + return; + } + case DataFlowSanitizer::WK_Functional: { + CS.setCalledFunction(F); + visitOperandShadowInst(*CS.getInstruction()); + return; + } + case DataFlowSanitizer::WK_Custom: { + // Don't try to handle invokes of custom functions, it's too complicated. + // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_ + // wrapper. + if (CallInst *CI = dyn_cast(CS.getInstruction())) { + FunctionType *FT = F->getFunctionType(); + FunctionType *CustomFT = DFSF.DFS.getCustomFunctionType(FT); + std::string CustomFName = "__dfsw_"; + CustomFName += F->getName(); + Constant *CustomF = + DFSF.DFS.Mod->getOrInsertFunction(CustomFName, CustomFT); + if (Function *CustomFn = dyn_cast(CustomF)) { + CustomFn->copyAttributesFrom(F); + + // Custom functions returning non-void will write to the return label. + if (!FT->getReturnType()->isVoidTy()) { + CustomFn->removeAttributes(AttributeSet::FunctionIndex, + DFSF.DFS.ReadOnlyNoneAttrs); + } + } - IRBuilder<> IRB(CS.getInstruction()); + std::vector Args; + + CallSite::arg_iterator i = CS.arg_begin(); + for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) { + Type *T = (*i)->getType(); + FunctionType *ParamFT; + if (isa(T) && + (ParamFT = dyn_cast( + cast(T)->getElementType()))) { + std::string TName = "dfst"; + TName += utostr(FT->getNumParams() - n); + TName += "$"; + TName += F->getName(); + Constant *T = DFSF.DFS.getOrBuildTrampolineFunction(ParamFT, TName); + Args.push_back(T); + Args.push_back( + IRB.CreateBitCast(*i, Type::getInt8PtrTy(*DFSF.DFS.Ctx))); + } else { + Args.push_back(*i); + } + } + + i = CS.arg_begin(); + for (unsigned n = FT->getNumParams(); n != 0; ++i, --n) + Args.push_back(DFSF.getShadow(*i)); + + if (!FT->getReturnType()->isVoidTy()) { + if (!DFSF.LabelReturnAlloca) { + DFSF.LabelReturnAlloca = + new AllocaInst(DFSF.DFS.ShadowTy, "labelreturn", + DFSF.F->getEntryBlock().begin()); + } + Args.push_back(DFSF.LabelReturnAlloca); + } + + CallInst *CustomCI = IRB.CreateCall(CustomF, Args); + CustomCI->setCallingConv(CI->getCallingConv()); + CustomCI->setAttributes(CI->getAttributes()); + + if (!FT->getReturnType()->isVoidTy()) { + LoadInst *LabelLoad = IRB.CreateLoad(DFSF.LabelReturnAlloca); + DFSF.setShadow(CustomCI, LabelLoad); + } + + CI->replaceAllUsesWith(CustomCI); + CI->eraseFromParent(); + return; + } + break; + } + } + } FunctionType *FT = cast( CS.getCalledValue()->getType()->getPointerElementType()); - if (DFSF.DFS.getDefaultInstrumentedABI() == DataFlowSanitizer::IA_TLS) { + if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) { for (unsigned i = 0, n = FT->getNumParams(); i != n; ++i) { IRB.CreateStore(DFSF.getShadow(CS.getArgument(i)), DFSF.getArgTLS(i, CS.getInstruction())); @@ -930,18 +1313,19 @@ void DFSanVisitor::visitCallSite(CallSite CS) { Next = CS->getNextNode(); } - if (DFSF.DFS.getDefaultInstrumentedABI() == DataFlowSanitizer::IA_TLS) { + if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) { IRBuilder<> NextIRB(Next); LoadInst *LI = NextIRB.CreateLoad(DFSF.getRetvalTLS()); DFSF.SkipInsts.insert(LI); DFSF.setShadow(CS.getInstruction(), LI); + DFSF.NonZeroChecks.insert(LI); } } // Do all instrumentation for IA_Args down here to defer tampering with the // CFG in a way that SplitEdge may be able to detect. - if (DFSF.DFS.getDefaultInstrumentedABI() == DataFlowSanitizer::IA_Args) { - FunctionType *NewFT = DFSF.DFS.getInstrumentedFunctionType(FT); + if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_Args) { + FunctionType *NewFT = DFSF.DFS.getArgsFunctionType(FT); Value *Func = IRB.CreateBitCast(CS.getCalledValue(), PointerType::getUnqual(NewFT)); std::vector Args; @@ -988,6 +1372,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) { ExtractValueInst::Create(NewCS.getInstruction(), 1, "", Next); DFSF.SkipInsts.insert(ExShadow); DFSF.setShadow(ExVal, ExShadow); + DFSF.NonZeroChecks.insert(ExShadow); CS.getInstruction()->replaceAllUsesWith(ExVal); }