//
// In LLVM IR, memory does not have types, so LLVM's own type system is not
// suitable for doing TBAA. Instead, metadata is added to the IR to describe
-// a type system of a higher level language.
+// a type system of a higher level language. This can be used to implement
+// typical C/C++ TBAA, but it can also be used to implement custom alias
+// analysis behavior for other languages.
//
-// This pass is language-independent. The type system is encoded in
-// metadata. This allows this pass to support typical C and C++ TBAA, but
-// it can also support custom aliasing behavior for other languages.
+// The current metadata format is very simple. TBAA MDNodes have up to
+// three fields, e.g.:
+// !0 = metadata !{ metadata !"an example type tree" }
+// !1 = metadata !{ metadata !"int", metadata !0 }
+// !2 = metadata !{ metadata !"float", metadata !0 }
+// !3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
//
-// This is a work-in-progress. It doesn't work yet, and the metadata
-// format isn't stable.
+// The first field is an identity field. It can be any value, usually
+// an MDString, which uniquely identifies the type. The most important
+// name in the tree is the name of the root node. Two trees with
+// different root node names are entirely disjoint, even if they
+// have leaves with common names.
//
-// TODO: getModRefBehavior. The AliasAnalysis infrastructure will need to
-// be extended.
-// TODO: struct fields
+// The second field identifies the type's parent node in the tree, or
+// is null or omitted for a root node. A type is considered to alias
+// all of its descendants and all of its ancestors in the tree. Also,
+// a type is considered to alias all types in other trees, so that
+// bitcode produced from multiple front-ends is handled conservatively.
+//
+// If the third field is present, it's an integer which if equal to 1
+// indicates that the type is "constant" (meaning pointsToConstantMemory
+// should return true; see
+// http://llvm.org/docs/AliasAnalysis.html#OtherItfs).
+//
+// TODO: The current metadata format doesn't support struct
+// fields. For example:
+// struct X {
+// double d;
+// int i;
+// };
+// void foo(struct X *x, struct X *y, double *p) {
+// *x = *y;
+// *p = 0.0;
+// }
+// Struct X has a double member, so the store to *x can alias the store to *p.
+// Currently it's not possible to precisely describe all the things struct X
+// aliases, so struct assignments must use conservative TBAA nodes. There's
+// no scheme for attaching metadata to @llvm.memcpy yet either.
//
//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Passes.h"
-#include "llvm/Module.h"
-#include "llvm/Metadata.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+// A handy option for disabling TBAA functionality. The same effect can also be
+// achieved by stripping the !tbaa tags from IR, but this option is sometimes
+// more convenient.
+static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
+static cl::opt<bool> EnableStructPathTBAA("struct-path-tbaa", cl::init(false));
+
namespace {
/// TBAANode - This is a simple wrapper around an MDNode which provides a
/// higher-level interface by hiding the details of how alias analysis
/// getNode - Get the MDNode for this TBAANode.
const MDNode *getNode() const { return Node; }
- /// getParent - Get this TBAANode's Alias DAG parent.
+ /// getParent - Get this TBAANode's Alias tree parent.
TBAANode getParent() const {
if (Node->getNumOperands() < 2)
return TBAANode();
ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2));
if (!CI)
return false;
- // TODO: Think about the encoding.
- return CI->isOne();
+ return CI->getValue()[0];
+ }
+ };
+
+ /// This is a simple wrapper around an MDNode which provides a
+ /// higher-level interface by hiding the details of how alias analysis
+ /// information is encoded in its operands.
+ class TBAAStructTagNode {
+ /// This node should be created with createTBAAStructTagNode.
+ const MDNode *Node;
+
+ public:
+ TBAAStructTagNode() : Node(0) {}
+ explicit TBAAStructTagNode(const MDNode *N) : Node(N) {}
+
+ /// Get the MDNode for this TBAAStructTagNode.
+ const MDNode *getNode() const { return Node; }
+
+ const MDNode *getBaseType() const {
+ return dyn_cast_or_null<MDNode>(Node->getOperand(0));
+ }
+ const MDNode *getAccessType() const {
+ return dyn_cast_or_null<MDNode>(Node->getOperand(1));
+ }
+ uint64_t getOffset() const {
+ return cast<ConstantInt>(Node->getOperand(2))->getZExtValue();
+ }
+ /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for
+ /// objects which are not modified (by any means) in the context where this
+ /// AliasAnalysis is relevant.
+ bool TypeIsImmutable() const {
+ if (Node->getNumOperands() < 4)
+ return false;
+ ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(3));
+ if (!CI)
+ return false;
+ return CI->getValue()[0];
+ }
+ };
+
+ /// This is a simple wrapper around an MDNode which provides a
+ /// higher-level interface by hiding the details of how alias analysis
+ /// information is encoded in its operands.
+ class TBAAStructTypeNode {
+ /// This node should be created with createTBAAStructTypeNode.
+ const MDNode *Node;
+
+ public:
+ TBAAStructTypeNode() : Node(0) {}
+ explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
+
+ /// Get the MDNode for this TBAAStructTypeNode.
+ const MDNode *getNode() const { return Node; }
+
+ /// Get this TBAAStructTypeNode's field in the type DAG with
+ /// given offset. Update the offset to be relative to the field type.
+ TBAAStructTypeNode getParent(uint64_t &Offset) const {
+ // Parent can be omitted for the root node.
+ if (Node->getNumOperands() < 2)
+ return TBAAStructTypeNode();
+
+ // Special handling for a scalar type node.
+ if (Node->getNumOperands() <= 3) {
+ MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
+ if (!P)
+ return TBAAStructTypeNode();
+ return TBAAStructTypeNode(P);
+ }
+
+ // Assume the offsets are in order. We return the previous field if
+ // the current offset is bigger than the given offset.
+ unsigned TheIdx = 0;
+ for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) {
+ uint64_t Cur = cast<ConstantInt>(Node->getOperand(Idx + 1))->
+ getZExtValue();
+ if (Cur > Offset) {
+ assert(Idx >= 3 &&
+ "TBAAStructTypeNode::getParent should have an offset match!");
+ TheIdx = Idx - 2;
+ break;
+ }
+ }
+ // Move along the last field.
+ if (TheIdx == 0)
+ TheIdx = Node->getNumOperands() - 2;
+ uint64_t Cur = cast<ConstantInt>(Node->getOperand(TheIdx + 1))->
+ getZExtValue();
+ Offset -= Cur;
+ MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx));
+ if (!P)
+ return TBAAStructTypeNode();
+ return TBAAStructTypeNode(P);
}
};
}
public AliasAnalysis {
public:
static char ID; // Class identification, replacement for typeinfo
- TypeBasedAliasAnalysis() : ImmutablePass(ID) {}
+ TypeBasedAliasAnalysis() : ImmutablePass(ID) {
+ initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry());
+ }
virtual void initializePass() {
InitializeAliasAnalysis(this);
return this;
}
+ bool Aliases(const MDNode *A, const MDNode *B) const;
+ bool PathAliases(const MDNode *A, const MDNode *B) const;
+
private:
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
virtual AliasResult alias(const Location &LocA, const Location &LocB);
- virtual bool pointsToConstantMemory(const Location &Loc);
+ virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+ virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+ virtual ModRefBehavior getModRefBehavior(const Function *F);
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc);
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2);
};
} // End of anonymous namespace
AliasAnalysis::getAnalysisUsage(AU);
}
-AliasAnalysis::AliasResult
-TypeBasedAliasAnalysis::alias(const Location &LocA,
- const Location &LocB) {
- // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
- // be conservative.
- const MDNode *AM = LocA.TBAATag;
- if (!AM) return AliasAnalysis::alias(LocA, LocB);
- const MDNode *BM = LocB.TBAATag;
- if (!BM) return AliasAnalysis::alias(LocA, LocB);
+/// Aliases - Test whether the type represented by A may alias the
+/// type represented by B.
+bool
+TypeBasedAliasAnalysis::Aliases(const MDNode *A,
+ const MDNode *B) const {
+ if (EnableStructPathTBAA)
+ return PathAliases(A, B);
// Keep track of the root node for A and B.
TBAANode RootA, RootB;
- // Climb the DAG from A to see if we reach B.
- for (TBAANode T(AM); ; ) {
- if (T.getNode() == BM)
+ // Climb the tree from A to see if we reach B.
+ for (TBAANode T(A); ; ) {
+ if (T.getNode() == B)
// B is an ancestor of A.
- return AliasAnalysis::alias(LocA, LocB);
+ return true;
RootA = T;
T = T.getParent();
break;
}
- // Climb the DAG from B to see if we reach A.
- for (TBAANode T(BM); ; ) {
- if (T.getNode() == AM)
+ // Climb the tree from B to see if we reach A.
+ for (TBAANode T(B); ; ) {
+ if (T.getNode() == A)
// A is an ancestor of B.
- return AliasAnalysis::alias(LocA, LocB);
+ return true;
RootB = T;
T = T.getParent();
// Neither node is an ancestor of the other.
+ // If they have different roots, they're part of different potentially
+ // unrelated type systems, so we must be conservative.
+ if (RootA.getNode() != RootB.getNode())
+ return true;
+
// If they have the same root, then we've proved there's no alias.
- if (RootA.getNode() == RootB.getNode())
- return NoAlias;
+ return false;
+}
+
+/// Test whether the struct-path tag represented by A may alias the
+/// struct-path tag represented by B.
+bool
+TypeBasedAliasAnalysis::PathAliases(const MDNode *A,
+ const MDNode *B) const {
+ // Keep track of the root node for A and B.
+ TBAAStructTypeNode RootA, RootB;
+ TBAAStructTagNode TagA(A), TagB(B);
+
+ // TODO: We need to check if AccessType of TagA encloses AccessType of
+ // TagB to support aggregate AccessType. If yes, return true.
+
+ // Start from the base type of A, follow the edge with the correct offset in
+ // the type DAG and adjust the offset until we reach the base type of B or
+ // until we reach the Root node.
+ // Compare the adjusted offset once we have the same base.
+
+ // Climb the type DAG from base type of A to see if we reach base type of B.
+ const MDNode *BaseA = TagA.getBaseType();
+ const MDNode *BaseB = TagB.getBaseType();
+ uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset();
+ for (TBAAStructTypeNode T(BaseA); ; ) {
+ if (T.getNode() == BaseB)
+ // Base type of A encloses base type of B, check if the offsets match.
+ return OffsetA == OffsetB;
+
+ RootA = T;
+ // Follow the edge with the correct offset, OffsetA will be adjusted to
+ // be relative to the field type.
+ T = T.getParent(OffsetA);
+ if (!T.getNode())
+ break;
+ }
+
+ // Reset OffsetA and climb the type DAG from base type of B to see if we reach
+ // base type of A.
+ OffsetA = TagA.getOffset();
+ for (TBAAStructTypeNode T(BaseB); ; ) {
+ if (T.getNode() == BaseA)
+ // Base type of B encloses base type of A, check if the offsets match.
+ return OffsetA == OffsetB;
+
+ RootB = T;
+ // Follow the edge with the correct offset, OffsetB will be adjusted to
+ // be relative to the field type.
+ T = T.getParent(OffsetB);
+ if (!T.getNode())
+ break;
+ }
+
+ // Neither node is an ancestor of the other.
// If they have different roots, they're part of different potentially
// unrelated type systems, so we must be conservative.
- return AliasAnalysis::alias(LocA, LocB);
+ if (RootA.getNode() != RootB.getNode())
+ return true;
+
+ // If they have the same root, then we've proved there's no alias.
+ return false;
+}
+
+AliasAnalysis::AliasResult
+TypeBasedAliasAnalysis::alias(const Location &LocA,
+ const Location &LocB) {
+ if (!EnableTBAA)
+ return AliasAnalysis::alias(LocA, LocB);
+
+ // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
+ // be conservative.
+ const MDNode *AM = LocA.TBAATag;
+ if (!AM) return AliasAnalysis::alias(LocA, LocB);
+ const MDNode *BM = LocB.TBAATag;
+ if (!BM) return AliasAnalysis::alias(LocA, LocB);
+
+ // If they may alias, chain to the next AliasAnalysis.
+ if (Aliases(AM, BM))
+ return AliasAnalysis::alias(LocA, LocB);
+
+ // Otherwise return a definitive result.
+ return NoAlias;
}
-bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc) {
+bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc,
+ bool OrLocal) {
+ if (!EnableTBAA)
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
const MDNode *M = Loc.TBAATag;
- if (!M) return false;
+ if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
// If this is an "immutable" type, we can assume the pointer is pointing
// to constant memory.
- return TBAANode(M).TypeIsImmutable();
+ if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) ||
+ (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable()))
+ return true;
+
+ return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+}
+
+AliasAnalysis::ModRefBehavior
+TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+ if (!EnableTBAA)
+ return AliasAnalysis::getModRefBehavior(CS);
+
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ // If this is an "immutable" type, we can assume the call doesn't write
+ // to memory.
+ if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) ||
+ (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable()))
+ Min = OnlyReadsMemory;
+
+ return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+}
+
+AliasAnalysis::ModRefBehavior
+TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) {
+ // Functions don't have metadata. Just chain to the next implementation.
+ return AliasAnalysis::getModRefBehavior(F);
+}
+
+AliasAnalysis::ModRefResult
+TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+ const Location &Loc) {
+ if (!EnableTBAA)
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+
+ if (const MDNode *L = Loc.TBAATag)
+ if (const MDNode *M =
+ CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (!Aliases(L, M))
+ return NoModRef;
+
+ return AliasAnalysis::getModRefInfo(CS, Loc);
+}
+
+AliasAnalysis::ModRefResult
+TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ if (!EnableTBAA)
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+
+ if (const MDNode *M1 =
+ CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (const MDNode *M2 =
+ CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+ if (!Aliases(M1, M2))
+ return NoModRef;
+
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+}
+
+MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
+ if (!A || !B)
+ return NULL;
+
+ if (A == B)
+ return A;
+
+ // For struct-path aware TBAA, we use the access type of the tag.
+ if (EnableStructPathTBAA) {
+ A = cast_or_null<MDNode>(A->getOperand(1));
+ if (!A) return 0;
+ B = cast_or_null<MDNode>(B->getOperand(1));
+ if (!B) return 0;
+ }
+
+ SmallVector<MDNode *, 4> PathA;
+ MDNode *T = A;
+ while (T) {
+ PathA.push_back(T);
+ T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
+ }
+
+ SmallVector<MDNode *, 4> PathB;
+ T = B;
+ while (T) {
+ PathB.push_back(T);
+ T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
+ }
+
+ int IA = PathA.size() - 1;
+ int IB = PathB.size() - 1;
+
+ MDNode *Ret = 0;
+ while (IA >= 0 && IB >=0) {
+ if (PathA[IA] == PathB[IB])
+ Ret = PathA[IA];
+ else
+ break;
+ --IA;
+ --IB;
+ }
+ if (!EnableStructPathTBAA)
+ return Ret;
+
+ if (!Ret)
+ return 0;
+ // We need to convert from a type node to a tag node.
+ Type *Int64 = IntegerType::get(A->getContext(), 64);
+ Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) };
+ return MDNode::get(A->getContext(), Ops);
}