Add support for the new va_arg instruction

[oota-llvm.git] / lib / Transforms / IPO / OldPoolAllocate.cpp
diff --git a/lib/Transforms/IPO/OldPoolAllocate.cpp b/lib/Transforms/IPO/OldPoolAllocate.cpp

index 5dcf825070428a158908e49d1d4c68a35b55603e..bf86403d86b7256b00deba0f6808aa62cb542265 100644 (file)
--- a/lib/Transforms/IPO/OldPoolAllocate.cpp
+++ b/lib/Transforms/IPO/OldPoolAllocate.cpp
@@ -6,39 +6,124 @@
  //
  //===----------------------------------------------------------------------===//
  
-#include "llvm/Transforms/IPO/PoolAllocate.h"
-#include "llvm/Transforms/CloneFunction.h"
+#if 0
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/Cloning.h"
  #include "llvm/Analysis/DataStructure.h"
-#include "llvm/Analysis/DataStructureGraph.h"
-#include "llvm/Pass.h"
  #include "llvm/Module.h"
-#include "llvm/Function.h"
  #include "llvm/iMemory.h"
  #include "llvm/iTerminators.h"
+#include "llvm/iPHINode.h"
  #include "llvm/iOther.h"
-#include "llvm/ConstantVals.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Constants.h"
  #include "llvm/Target/TargetData.h"
  #include "llvm/Support/InstVisitor.h"
  #include "Support/DepthFirstIterator.h"
  #include "Support/STLExtras.h"
  #include <algorithm>
+using std::vector;
+using std::cerr;
+using std::map;
+using std::string;
+using std::set;
+
+// DEBUG_CREATE_POOLS - Enable this to turn on debug output for the pool
+// creation phase in the top level function of a transformed data structure.
+//
+//#define DEBUG_CREATE_POOLS 1
+
+// DEBUG_TRANSFORM_PROGRESS - Enable this to get lots of debug output on what
+// the transformation is doing.
+//
+//#define DEBUG_TRANSFORM_PROGRESS 1
+
+// DEBUG_POOLBASE_LOAD_ELIMINATOR - Turn this on to get statistics about how
+// many static loads were eliminated from a function...
+//
+#define DEBUG_POOLBASE_LOAD_ELIMINATOR 1
+
+#include "Support/CommandLine.h"
+enum PtrSize {
+  Ptr8bits, Ptr16bits, Ptr32bits
+};
  
+static cl::opt<PtrSize>
+ReqPointerSize("poolalloc-ptr-size",
+               cl::desc("Set pointer size for -poolalloc pass"),
+               cl::values(
+  clEnumValN(Ptr32bits, "32", "Use 32 bit indices for pointers"),
+  clEnumValN(Ptr16bits, "16", "Use 16 bit indices for pointers"),
+  clEnumValN(Ptr8bits ,  "8", "Use 8 bit indices for pointers"),
+                          0));
+
+static cl::opt<bool>
+DisableRLE("no-pool-load-elim",  cl::Hidden,
+           cl::desc("Disable pool load elimination after poolalloc pass"));
+
+const Type *POINTERTYPE;
  
  // FIXME: This is dependant on the sparc backend layout conventions!!
  static TargetData TargetData("test");
  
+static const Type *getPointerTransformedType(const Type *Ty) {
+  if (const PointerType *PT = dyn_cast<PointerType>(Ty)) {
+    return POINTERTYPE;
+  } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    vector<const Type *> NewElTypes;
+    NewElTypes.reserve(STy->getElementTypes().size());
+    for (StructType::ElementTypes::const_iterator
+           I = STy->getElementTypes().begin(),
+           E = STy->getElementTypes().end(); I != E; ++I)
+      NewElTypes.push_back(getPointerTransformedType(*I));
+    return StructType::get(NewElTypes);
+  } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    return ArrayType::get(getPointerTransformedType(ATy->getElementType()),
+                                                    ATy->getNumElements());
+  } else {
+    assert(Ty->isPrimitiveType() && "Unknown derived type!");
+    return Ty;
+  }
+}
+
  namespace {
+  struct PoolInfo {
+    DSNode *Node;           // The node this pool allocation represents
+    Value  *Handle;         // LLVM value of the pool in the current context
+    const Type *NewType;    // The transformed type of the memory objects
+    const Type *PoolType;   // The type of the pool
+
+    const Type *getOldType() const { return Node->getType(); }
+
+    PoolInfo() {  // Define a default ctor for map::operator[]
+      cerr << "Map subscript used to get element that doesn't exist!\n";
+      abort();  // Invalid
+    }
+
+    PoolInfo(DSNode *N, Value *H, const Type *NT, const Type *PT)
+      : Node(N), Handle(H), NewType(NT), PoolType(PT) {
+      // Handle can be null...
+      assert(N && NT && PT && "Pool info null!");
+    }
+
+    PoolInfo(DSNode *N) : Node(N), Handle(0), NewType(0), PoolType(0) {
+      assert(N && "Invalid pool info!");
+
+      // The new type of the memory object is the same as the old type, except
+      // that all of the pointer values are replaced with POINTERTYPE values.
+      NewType = getPointerTransformedType(getOldType());
+    }
+  };
+
    // ScalarInfo - Information about an LLVM value that we know points to some
    // datastructure we are processing.
    //
    struct ScalarInfo {
      Value  *Val;            // Scalar value in Current Function
-    DSNode *Node;           // DataStructure node it points to
-    Value  *PoolHandle;     // PoolTy* LLVM value
+    PoolInfo Pool;          // The pool the scalar points into
      
-    ScalarInfo(Value *V, DSNode *N, Value *PH)
-      : Val(V), Node(N), PoolHandle(PH) {
-      assert(V && N && PH && "Null value passed to ScalarInfo ctor!");
+    ScalarInfo(Value *V, const PoolInfo &PI) : Val(V), Pool(PI) {
+      assert(V && "Null value passed to ScalarInfo ctor!");
      }
    };
  
@@ -64,9 +149,8 @@ namespace {
    //
    struct TransformFunctionInfo {
      // ArgInfo - Maintain information about the arguments that need to be
-    // processed.  Each pair corresponds to an argument (whose number is the
-    // first element) that needs to have a pool pointer (the second element)
-    // passed into the transformed function with it.
+    // processed.  Each CallArgInfo corresponds to an argument that needs to
+    // have a pool pointer passed into the transformed function with it.
      //
      // As a special case, "argument" number -1 corresponds to the return value.
      //
@@ -95,37 +179,70 @@ namespace {
        // argument records, in order.  Note that this must be a stable sort so
        // that the entries with the same sorting criteria (ie they are multiple
        // pool entries for the same argument) are kept in depth first order.
-      stable_sort(ArgInfo.begin(), ArgInfo.end());
+      std::stable_sort(ArgInfo.begin(), ArgInfo.end());
      }
+
+    // addCallInfo - For a specified function call CI, figure out which pool
+    // descriptors need to be passed in as arguments, and which arguments need
+    // to be transformed into indices.  If Arg != -1, the specified call
+    // argument is passed in as a pointer to a data structure.
+    //
+    void addCallInfo(DataStructure *DS, CallInst *CI, int Arg,
+                     DSNode *GraphNode, map<DSNode*, PoolInfo> &PoolDescs);
+
+    // Make sure that all dependant arguments are added to this transformation
+    // info.  For example, if we call foo(null, P) and foo treats it's first and
+    // second arguments as belonging to the same data structure, the we MUST add
+    // entries to know that the null needs to be transformed into an index as
+    // well.
+    //
+    void ensureDependantArgumentsIncluded(DataStructure *DS,
+                                          map<DSNode*, PoolInfo> &PoolDescs);
    };
  
  
    // Define the pass class that we implement...
-  class PoolAllocate : public Pass {
-    // PoolTy - The type of a scalar value that contains a pool pointer.
-    PointerType *PoolTy;
-  public:
-
+  struct PoolAllocate : public Pass {
      PoolAllocate() {
-      // Initialize the PoolTy instance variable, since the type never changes.
+      switch (ReqPointerSize) {
+      case Ptr32bits: POINTERTYPE = Type::UIntTy; break;
+      case Ptr16bits: POINTERTYPE = Type::UShortTy; break;
+      case Ptr8bits:  POINTERTYPE = Type::UByteTy; break;
+      }
+
+      CurModule = 0; DS = 0;
+      PoolInit = PoolDestroy = PoolAlloc = PoolFree = 0;
+    }
+
+    // getPoolType - Get the type used by the backend for a pool of a particular
+    // type.  This pool record is used to allocate nodes of type NodeType.
+    //
+    // Here, PoolTy = { NodeType*, sbyte*, uint }*
+    //
+    const StructType *getPoolType(const Type *NodeType) {
        vector<const Type*> PoolElements;
+      PoolElements.push_back(PointerType::get(NodeType));
        PoolElements.push_back(PointerType::get(Type::SByteTy));
        PoolElements.push_back(Type::UIntTy);
-      PoolTy = PointerType::get(StructType::get(PoolElements));
-      // PoolTy = { sbyte*, uint }*
+      StructType *Result = StructType::get(PoolElements);
  
-      CurModule = 0; DS = 0;
-      PoolInit = PoolDestroy = PoolAlloc = PoolFree = 0;
+      // Add a name to the symbol table to correspond to the backend
+      // representation of this pool...
+      assert(CurModule && "No current module!?");
+      string Name = CurModule->getTypeName(NodeType);
+      if (Name.empty()) Name = CurModule->getTypeName(PoolElements[0]);
+      CurModule->addTypeName(Name+"oolbe", Result);
+
+      return Result;
      }
  
-    bool run(Module *M);
+    bool run(Module &M);
  
-    // getAnalysisUsageInfo - This function requires data structure information
+    // getAnalysisUsage - This function requires data structure information
      // to be able to see what is pool allocatable.
      //
-    virtual void getAnalysisUsageInfo(Pass::AnalysisSet &Required,
-                                      Pass::AnalysisSet &,Pass::AnalysisSet &) {
-      Required.push_back(DataStructure::ID);
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<DataStructure>();
      }
  
    public:
@@ -136,7 +253,7 @@ namespace {
      DataStructure *DS;
  
      // Prototypes that we add to support pool allocation...
-    Function *PoolInit, *PoolDestroy, *PoolAlloc, *PoolFree;
+    Function *PoolInit, *PoolDestroy, *PoolAlloc, *PoolAllocArray, *PoolFree;
  
      // The map of already transformed functions... note that the keys of this
      // map do not have meaningful values for 'Call' or the 'PoolHandle' elements
@@ -155,19 +272,20 @@ namespace {
      }
  
  
-    // addPoolPrototypes - Add prototypes for the pool methods to the specified
-    // module and update the Pool* instance variables to point to them.
+    // addPoolPrototypes - Add prototypes for the pool functions to the
+    // specified module and update the Pool* instance variables to point to
+    // them.
      //
-    void addPoolPrototypes(Module *M);
+    void addPoolPrototypes(Module &M);
  
  
      // CreatePools - Insert instructions into the function we are processing to
      // create all of the memory pool objects themselves.  This also inserts
      // destruction code.  Add an alloca for each pool that is allocated to the
-    // PoolDescriptors map.
+    // PoolDescs map.
      //
      void CreatePools(Function *F, const vector<AllocDSNode*> &Allocs,
-                     map<DSNode*, Value*> &PoolDescriptors);
+                     map<DSNode*, PoolInfo> &PoolDescs);
  
      // processFunction - Convert a function to use pool allocation where
      // available.
@@ -175,36 +293,33 @@ namespace {
      bool processFunction(Function *F);
  
      // transformFunctionBody - This transforms the instruction in 'F' to use the
-    // pools specified in PoolDescriptors when modifying data structure nodes
-    // specified in the PoolDescriptors map.  IPFGraph is the closed data
-    // structure graph for F, of which the PoolDescriptor nodes come from.
+    // pools specified in PoolDescs when modifying data structure nodes
+    // specified in the PoolDescs map.  IPFGraph is the closed data structure
+    // graph for F, of which the PoolDescriptor nodes come from.
      //
      void transformFunctionBody(Function *F, FunctionDSGraph &IPFGraph,
-                               map<DSNode*, Value*> &PoolDescriptors);
+                               map<DSNode*, PoolInfo> &PoolDescs);
  
      // transformFunction - Transform the specified function the specified way.
      // It we have already transformed that function that way, don't do anything.
      // The nodes in the TransformFunctionInfo come out of callers data structure
-    // graph.
+    // graph, and the PoolDescs passed in are the caller's.
      //
      void transformFunction(TransformFunctionInfo &TFI,
-                           FunctionDSGraph &CallerIPGraph);
+                           FunctionDSGraph &CallerIPGraph,
+                           map<DSNode*, PoolInfo> &PoolDescs);
  
    };
-}
-
  
+  RegisterOpt<PoolAllocate> X("poolalloc",
+                              "Pool allocate disjoint datastructures");
+}
  
  // isNotPoolableAlloc - This is a predicate that returns true if the specified
  // allocation node in a data structure graph is eligable for pool allocation.
  //
  static bool isNotPoolableAlloc(const AllocDSNode *DS) {
    if (DS->isAllocaNode()) return true;  // Do not pool allocate alloca's.
-
-  MallocInst *MI = cast<MallocInst>(DS->getAllocation());
-  if (MI->isArrayAllocation() && !isa<Constant>(MI->getArraySize()))
-    return true;   // Do not allow variable size allocations...
-
    return false;
  }
  
@@ -229,202 +344,746 @@ bool PoolAllocate::processFunction(Function *F) {
    // variable sized array allocations and alloca's (which we do not want to
    // pool allocate)
    //
-  Allocs.erase(remove_if(Allocs.begin(), Allocs.end(), isNotPoolableAlloc),
+  Allocs.erase(std::remove_if(Allocs.begin(), Allocs.end(), isNotPoolableAlloc),
                 Allocs.end());
  
  
    if (Allocs.empty()) return false;  // Nothing to do.
  
+#ifdef DEBUG_TRANSFORM_PROGRESS
+  cerr << "Transforming Function: " << F->getName() << "\n";
+#endif
+
    // Insert instructions into the function we are processing to create all of
    // the memory pool objects themselves.  This also inserts destruction code.
-  // This fills in the PoolDescriptors map to associate the alloc node with the
+  // This fills in the PoolDescs map to associate the alloc node with the
    // allocation of the memory pool corresponding to it.
    // 
-  map<DSNode*, Value*> PoolDescriptors;
-  CreatePools(F, Allocs, PoolDescriptors);
+  map<DSNode*, PoolInfo> PoolDescs;
+  CreatePools(F, Allocs, PoolDescs);
+
+#ifdef DEBUG_TRANSFORM_PROGRESS
+  cerr << "Transformed Entry Function: \n" << F;
+#endif
  
-  // Now we need to figure out what called methods we need to transform, and
+  // Now we need to figure out what called functions we need to transform, and
    // how.  To do this, we look at all of the scalars, seeing which functions are
    // either used as a scalar value (so they return a data structure), or are
    // passed one of our scalar values.
    //
-  transformFunctionBody(F, IPGraph, PoolDescriptors);
+  transformFunctionBody(F, IPGraph, PoolDescs);
  
    return true;
  }
  
  
-class FunctionBodyTransformer : public InstVisitor<FunctionBodyTransformer> {
+//===----------------------------------------------------------------------===//
+//
+// NewInstructionCreator - This class is used to traverse the function being
+// modified, changing each instruction visit'ed to use and provide pointer
+// indexes instead of real pointers.  This is what changes the body of a
+// function to use pool allocation.
+//
+class NewInstructionCreator : public InstVisitor<NewInstructionCreator> {
    PoolAllocate &PoolAllocator;
    vector<ScalarInfo> &Scalars;
    map<CallInst*, TransformFunctionInfo> &CallMap;
+  map<Value*, Value*> &XFormMap;   // Map old pointers to new indexes
+
+  struct RefToUpdate {
+    Instruction *I;       // Instruction to update
+    unsigned     OpNum;   // Operand number to update
+    Value       *OldVal;  // The old value it had
+
+    RefToUpdate(Instruction *i, unsigned o, Value *ov)
+      : I(i), OpNum(o), OldVal(ov) {}
+  };
+  vector<RefToUpdate> ReferencesToUpdate;
  
-  const ScalarInfo &getScalar(const Value *V) {
+  const ScalarInfo &getScalarRef(const Value *V) {
      for (unsigned i = 0, e = Scalars.size(); i != e; ++i)
        if (Scalars[i].Val == V) return Scalars[i];
+
+    cerr << "Could not find scalar " << V << " in scalar map!\n";
      assert(0 && "Scalar not found in getScalar!");
      abort();
      return Scalars[0];
    }
-
-  // updateScalars - Map the scalars array entries that look like 'From' to look
-  // like 'To'.
-  //
-  void updateScalars(Value *From, Value *To) {
+  
+  const ScalarInfo *getScalar(const Value *V) {
      for (unsigned i = 0, e = Scalars.size(); i != e; ++i)
-      if (Scalars[i].Val == From) Scalars[i].Val = To;
+      if (Scalars[i].Val == V) return &Scalars[i];
+    return 0;
    }
  
-public:
-  FunctionBodyTransformer(PoolAllocate &PA, vector<ScalarInfo> &S,
-                          map<CallInst*, TransformFunctionInfo> &C)
-    : PoolAllocator(PA), Scalars(S), CallMap(C) {}
+  BasicBlock::iterator ReplaceInstWith(Instruction &I, Instruction *New) {
+    BasicBlock *BB = I.getParent();
+    BasicBlock::iterator RI = &I;
+    BB->getInstList().remove(RI);
+    BB->getInstList().insert(RI, New);
+    XFormMap[&I] = New;
+    return New;
+  }
  
-  void visitMemAccessInst(MemAccessInst *MAI) {
-    // Don't do anything to load, store, or GEP yet...
+  Instruction *createPoolBaseInstruction(Value *PtrVal) {
+    const ScalarInfo &SC = getScalarRef(PtrVal);
+    vector<Value*> Args(3);
+    Args[0] = ConstantUInt::get(Type::UIntTy, 0);  // No pointer offset
+    Args[1] = ConstantUInt::get(Type::UByteTy, 0); // Field #0 of pool descriptr
+    Args[2] = ConstantUInt::get(Type::UByteTy, 0); // Field #0 of poolalloc val
+    return  new LoadInst(SC.Pool.Handle, Args, PtrVal->getName()+".poolbase");
    }
  
-  // Convert a malloc instruction into a call to poolalloc
-  void visitMallocInst(MallocInst *I) {
-    const ScalarInfo &SC = getScalar(I);
-    BasicBlock *BB = I->getParent();
-    BasicBlock::iterator MI = find(BB->begin(), BB->end(), I);
-    BB->getInstList().remove(MI);  // Remove the Malloc instruction from the BB
  
-    // Create a new call to poolalloc before the malloc instruction
-    vector<Value*> Args;
-    Args.push_back(SC.PoolHandle);
-    CallInst *Call = new CallInst(PoolAllocator.PoolAlloc, Args, I->getName());
-    MI = BB->getInstList().insert(MI, Call)+1;
-
-    // If the type desired is not void*, cast it now...
-    Value *Ptr = Call;
-    if (Call->getType() != I->getType()) {
-      CastInst *CI = new CastInst(Ptr, I->getType(), I->getName());
-      BB->getInstList().insert(MI, CI);
-      Ptr = CI;
+public:
+  NewInstructionCreator(PoolAllocate &PA, vector<ScalarInfo> &S,
+                        map<CallInst*, TransformFunctionInfo> &C,
+                        map<Value*, Value*> &X)
+    : PoolAllocator(PA), Scalars(S), CallMap(C), XFormMap(X) {}
+
+
+  // updateReferences - The NewInstructionCreator is responsible for creating
+  // new instructions to replace the old ones in the function, and then link up
+  // references to values to their new values.  For it to do this, however, it
+  // keeps track of information about the value mapping of old values to new
+  // values that need to be patched up.  Given this value map and a set of
+  // instruction operands to patch, updateReferences performs the updates.
+  //
+  void updateReferences() {
+    for (unsigned i = 0, e = ReferencesToUpdate.size(); i != e; ++i) {
+      RefToUpdate &Ref = ReferencesToUpdate[i];
+      Value *NewVal = XFormMap[Ref.OldVal];
+
+      if (NewVal == 0) {
+        if (isa<Constant>(Ref.OldVal) &&  // Refering to a null ptr?
+            cast<Constant>(Ref.OldVal)->isNullValue()) {
+          // Transform the null pointer into a null index... caching in XFormMap
+          XFormMap[Ref.OldVal] = NewVal = Constant::getNullValue(POINTERTYPE);
+          //} else if (isa<Argument>(Ref.OldVal)) {
+        } else {
+          cerr << "Unknown reference to: " << Ref.OldVal << "\n";
+          assert(XFormMap[Ref.OldVal] &&
+                 "Reference to value that was not updated found!");
+        }
+      }
+        
+      Ref.I->setOperand(Ref.OpNum, NewVal);
      }
+    ReferencesToUpdate.clear();
+  }
  
-    // Change everything that used the malloc to now use the pool alloc...
-    I->replaceAllUsesWith(Ptr);
+  //===--------------------------------------------------------------------===//
+  // Transformation methods:
+  //   These methods specify how each type of instruction is transformed by the
+  // NewInstructionCreator instance...
+  //===--------------------------------------------------------------------===//
  
-    // Update the scalars array...
-    updateScalars(I, Ptr);
+  void visitGetElementPtrInst(GetElementPtrInst &I) {
+    assert(0 && "Cannot transform get element ptr instructions yet!");
+  }
+
+  // Replace the load instruction with a new one.
+  void visitLoadInst(LoadInst &I) {
+    vector<Instruction *> BeforeInsts;
+
+    // Cast our index to be a UIntTy so we can use it to index into the pool...
+    CastInst *Index = new CastInst(Constant::getNullValue(POINTERTYPE),
+                                   Type::UIntTy, I.getOperand(0)->getName());
+    BeforeInsts.push_back(Index);
+    ReferencesToUpdate.push_back(RefToUpdate(Index, 0, I.getOperand(0)));
+    
+    // Include the pool base instruction...
+    Instruction *PoolBase = createPoolBaseInstruction(I.getOperand(0));
+    BeforeInsts.push_back(PoolBase);
+
+    Instruction *IdxInst =
+      BinaryOperator::create(Instruction::Add, *I.idx_begin(), Index,
+                             I.getName()+".idx");
+    BeforeInsts.push_back(IdxInst);
+
+    vector<Value*> Indices(I.idx_begin(), I.idx_end());
+    Indices[0] = IdxInst;
+    Instruction *Address = new GetElementPtrInst(PoolBase, Indices,
+                                                 I.getName()+".addr");
+    BeforeInsts.push_back(Address);
  
-    // Delete the instruction now.
-    delete I;
+    Instruction *NewLoad = new LoadInst(Address, I.getName());
+
+    // Replace the load instruction with the new load instruction...
+    BasicBlock::iterator II = ReplaceInstWith(I, NewLoad);
+
+    // Add all of the instructions before the load...
+    NewLoad->getParent()->getInstList().insert(II, BeforeInsts.begin(),
+                                               BeforeInsts.end());
+
+    // If not yielding a pool allocated pointer, use the new load value as the
+    // value in the program instead of the old load value...
+    //
+    if (!getScalar(&I))
+      I.replaceAllUsesWith(NewLoad);
    }
  
-  // Convert the free instruction into a call to poolfree
-  void visitFreeInst(FreeInst *I) {
-    Value *Ptr = I->getOperand(0);
-    const ScalarInfo &SC = getScalar(Ptr);
-    BasicBlock *BB = I->getParent();
-    BasicBlock::iterator FI = find(BB->begin(), BB->end(), I);
+  // Replace the store instruction with a new one.  In the store instruction,
+  // the value stored could be a pointer type, meaning that the new store may
+  // have to change one or both of it's operands.
+  //
+  void visitStoreInst(StoreInst &I) {
+    assert(getScalar(I.getOperand(1)) &&
+           "Store inst found only storing pool allocated pointer.  "
+           "Not imp yet!");
  
-    // If the value is not an sbyte*, convert it now!
-    if (Ptr->getType() != PointerType::get(Type::SByteTy)) {
-      CastInst *CI = new CastInst(Ptr, PointerType::get(Type::SByteTy),
-                                  Ptr->getName());
-      FI = BB->getInstList().insert(FI, CI)+1;
-      Ptr = CI;
-    }
+    Value *Val = I.getOperand(0);  // The value to store...
  
-    // Create a new call to poolfree before the free instruction
+    // Check to see if the value we are storing is a data structure pointer...
+    //if (const ScalarInfo *ValScalar = getScalar(I.getOperand(0)))
+    if (isa<PointerType>(I.getOperand(0)->getType()))
+      Val = Constant::getNullValue(POINTERTYPE);  // Yes, store a dummy
+
+    Instruction *PoolBase = createPoolBaseInstruction(I.getOperand(1));
+
+    // Cast our index to be a UIntTy so we can use it to index into the pool...
+    CastInst *Index = new CastInst(Constant::getNullValue(POINTERTYPE),
+                                   Type::UIntTy, I.getOperand(1)->getName());
+    ReferencesToUpdate.push_back(RefToUpdate(Index, 0, I.getOperand(1)));
+
+    // Instructions to add after the Index...
+    vector<Instruction*> AfterInsts;
+
+    Instruction *IdxInst =
+      BinaryOperator::create(Instruction::Add, *I.idx_begin(), Index, "idx");
+    AfterInsts.push_back(IdxInst);
+
+    vector<Value*> Indices(I.idx_begin(), I.idx_end());
+    Indices[0] = IdxInst;
+    Instruction *Address = new GetElementPtrInst(PoolBase, Indices,
+                                                 I.getName()+"storeaddr");
+    AfterInsts.push_back(Address);
+
+    Instruction *NewStore = new StoreInst(Val, Address);
+    AfterInsts.push_back(NewStore);
+    if (Val != I.getOperand(0))    // Value stored was a pointer?
+      ReferencesToUpdate.push_back(RefToUpdate(NewStore, 0, I.getOperand(0)));
+
+
+    // Replace the store instruction with the cast instruction...
+    BasicBlock::iterator II = ReplaceInstWith(I, Index);
+
+    // Add the pool base calculator instruction before the index...
+    II = ++Index->getParent()->getInstList().insert(II, PoolBase);
+    ++II;
+
+    // Add the instructions that go after the index...
+    Index->getParent()->getInstList().insert(II, AfterInsts.begin(),
+                                             AfterInsts.end());
+  }
+
+
+  // Create call to poolalloc for every malloc instruction
+  void visitMallocInst(MallocInst &I) {
+    const ScalarInfo &SCI = getScalarRef(&I);
      vector<Value*> Args;
-    Args.push_back(SC.PoolHandle);
-    Args.push_back(Ptr);
-    CallInst *Call = new CallInst(PoolAllocator.PoolFree, Args);
-    FI = BB->getInstList().insert(FI, Call)+1;
  
-    // Remove the old free instruction...
-    delete BB->getInstList().remove(FI);
+    CallInst *Call;
+    if (!I.isArrayAllocation()) {
+      Args.push_back(SCI.Pool.Handle);
+      Call = new CallInst(PoolAllocator.PoolAlloc, Args, I.getName());
+    } else {
+      Args.push_back(I.getArraySize());
+      Args.push_back(SCI.Pool.Handle);
+      Call = new CallInst(PoolAllocator.PoolAllocArray, Args, I.getName());
+    }    
+
+    ReplaceInstWith(I, Call);
+  }
+
+  // Convert a call to poolfree for every free instruction...
+  void visitFreeInst(FreeInst &I) {
+    // Create a new call to poolfree before the free instruction
+    vector<Value*> Args;
+    Args.push_back(Constant::getNullValue(POINTERTYPE));
+    Args.push_back(getScalarRef(I.getOperand(0)).Pool.Handle);
+    Instruction *NewCall = new CallInst(PoolAllocator.PoolFree, Args);
+    ReplaceInstWith(I, NewCall);
+    ReferencesToUpdate.push_back(RefToUpdate(NewCall, 1, I.getOperand(0)));
    }
  
    // visitCallInst - Create a new call instruction with the extra arguments for
    // all of the memory pools that the call needs.
    //
-  void visitCallInst(CallInst *I) {
-    TransformFunctionInfo &TI = CallMap[I];
-    BasicBlock *BB = I->getParent();
-    BasicBlock::iterator CI = find(BB->begin(), BB->end(), I);
-    BB->getInstList().remove(CI);  // Remove the old call instruction
+  void visitCallInst(CallInst &I) {
+    TransformFunctionInfo &TI = CallMap[&I];
  
      // Start with all of the old arguments...
-    vector<Value*> Args(I->op_begin()+1, I->op_end());
+    vector<Value*> Args(I.op_begin()+1, I.op_end());
  
-    // Add all of the pool arguments...
-    for (unsigned i = 0, e = TI.ArgInfo.size(); i != e; ++i)
+    for (unsigned i = 0, e = TI.ArgInfo.size(); i != e; ++i) {
+      // Replace all of the pointer arguments with our new pointer typed values.
+      if (TI.ArgInfo[i].ArgNo != -1)
+        Args[TI.ArgInfo[i].ArgNo] = Constant::getNullValue(POINTERTYPE);
+
+      // Add all of the pool arguments...
        Args.push_back(TI.ArgInfo[i].PoolHandle);
+    }
      
      Function *NF = PoolAllocator.getTransformedFunction(TI);
-    CallInst *NewCall = new CallInst(NF, Args, I->getName());
-    BB->getInstList().insert(CI, NewCall);
+    Instruction *NewCall = new CallInst(NF, Args, I.getName());
+    ReplaceInstWith(I, NewCall);
  
-    // Change everything that used the malloc to now use the pool alloc...
-    if (I->getType() != Type::VoidTy) {
-      I->replaceAllUsesWith(NewCall);
+    // Keep track of the mapping of operands so that we can resolve them to real
+    // values later.
+    Value *RetVal = NewCall;
+    for (unsigned i = 0, e = TI.ArgInfo.size(); i != e; ++i)
+      if (TI.ArgInfo[i].ArgNo != -1)
+        ReferencesToUpdate.push_back(RefToUpdate(NewCall, TI.ArgInfo[i].ArgNo+1,
+                                        I.getOperand(TI.ArgInfo[i].ArgNo+1)));
+      else
+        RetVal = 0;   // If returning a pointer, don't change retval...
+
+    // If not returning a pointer, use the new call as the value in the program
+    // instead of the old call...
+    //
+    if (RetVal)
+      I.replaceAllUsesWith(RetVal);
+  }
  
-      // Update the scalars array...
-      updateScalars(I, NewCall);
+  // visitPHINode - Create a new PHI node of POINTERTYPE for all of the old Phi
+  // nodes...
+  //
+  void visitPHINode(PHINode &PN) {
+    Value *DummyVal = Constant::getNullValue(POINTERTYPE);
+    PHINode *NewPhi = new PHINode(POINTERTYPE, PN.getName());
+    for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+      NewPhi->addIncoming(DummyVal, PN.getIncomingBlock(i));
+      ReferencesToUpdate.push_back(RefToUpdate(NewPhi, i*2, 
+                                               PN.getIncomingValue(i)));
      }
  
-    delete I;  // Delete the old call instruction now...
+    ReplaceInstWith(PN, NewPhi);
    }
  
-  void visitPHINode(PHINode *PN) {
-    // Handle PHI Node
+  // visitReturnInst - Replace ret instruction with a new return...
+  void visitReturnInst(ReturnInst &I) {
+    Instruction *Ret = new ReturnInst(Constant::getNullValue(POINTERTYPE));
+    ReplaceInstWith(I, Ret);
+    ReferencesToUpdate.push_back(RefToUpdate(Ret, 0, I.getOperand(0)));
    }
  
-  void visitReturnInst(ReturnInst *I) {
-    // Nothing of interest
+  // visitSetCondInst - Replace a conditional test instruction with a new one
+  void visitSetCondInst(SetCondInst &SCI) {
+    BinaryOperator &I = (BinaryOperator&)SCI;
+    Value *DummyVal = Constant::getNullValue(POINTERTYPE);
+    BinaryOperator *New = BinaryOperator::create(I.getOpcode(), DummyVal,
+                                                 DummyVal, I.getName());
+    ReplaceInstWith(I, New);
+
+    ReferencesToUpdate.push_back(RefToUpdate(New, 0, I.getOperand(0)));
+    ReferencesToUpdate.push_back(RefToUpdate(New, 1, I.getOperand(1)));
+
+    // Make sure branches refer to the new condition...
+    I.replaceAllUsesWith(New);
    }
  
-  void visitSetCondInst(SetCondInst *SCI) {
-    // hrm, notice a pattern?
+  void visitInstruction(Instruction &I) {
+    cerr << "Unknown instruction to FunctionBodyTransformer:\n" << I;
+  }
+};
+
+
+// PoolBaseLoadEliminator - Every load and store through a pool allocated
+// pointer causes a load of the real pool base out of the pool descriptor.
+// Iterate through the function, doing a local elimination pass of duplicate
+// loads.  This attempts to turn the all too common:
+//
+// %reg109.poolbase22 = load %root.pool* %root.pool, uint 0, ubyte 0, ubyte 0
+// %reg207 = load %root.p* %reg109.poolbase22, uint %reg109, ubyte 0, ubyte 0
+// %reg109.poolbase23 = load %root.pool* %root.pool, uint 0, ubyte 0, ubyte 0
+// store double %reg207, %root.p* %reg109.poolbase23, uint %reg109, ...
+//
+// into:
+// %reg109.poolbase22 = load %root.pool* %root.pool, uint 0, ubyte 0, ubyte 0
+// %reg207 = load %root.p* %reg109.poolbase22, uint %reg109, ubyte 0, ubyte 0
+// store double %reg207, %root.p* %reg109.poolbase22, uint %reg109, ...
+//
+//
+class PoolBaseLoadEliminator : public InstVisitor<PoolBaseLoadEliminator> {
+  // PoolDescValues - Keep track of the values in the current function that are
+  // pool descriptors (loads from which we want to eliminate).
+  //
+  vector<Value*>      PoolDescValues;
+
+  // PoolDescMap - As we are analyzing a BB, keep track of which load to use
+  // when referencing a pool descriptor.
+  //
+  map<Value*, LoadInst*> PoolDescMap;
+
+  // These two fields keep track of statistics of how effective we are, if
+  // debugging is enabled.
+  //
+  unsigned Eliminated, Remaining;
+public:
+  // Compact the pool descriptor map into a list of the pool descriptors in the
+  // current context that we should know about...
+  //
+  PoolBaseLoadEliminator(const map<DSNode*, PoolInfo> &PoolDescs) {
+    Eliminated = Remaining = 0;
+    for (map<DSNode*, PoolInfo>::const_iterator I = PoolDescs.begin(),
+           E = PoolDescs.end(); I != E; ++I)
+      PoolDescValues.push_back(I->second.Handle);
+    
+    // Remove duplicates from the list of pool values
+    sort(PoolDescValues.begin(), PoolDescValues.end());
+    PoolDescValues.erase(unique(PoolDescValues.begin(), PoolDescValues.end()),
+                         PoolDescValues.end());
+  }
+
+#ifdef DEBUG_POOLBASE_LOAD_ELIMINATOR
+  void visitFunction(Function &F) {
+    cerr << "Pool Load Elim '" << F.getName() << "'\t";
+  }
+  ~PoolBaseLoadEliminator() {
+    unsigned Total = Eliminated+Remaining;
+    if (Total)
+      cerr << "removed " << Eliminated << "["
+           << Eliminated*100/Total << "%] loads, leaving "
+           << Remaining << ".\n";
+  }
+#endif
+
+  // Loop over the function, looking for loads to eliminate.  Because we are a
+  // local transformation, we reset all of our state when we enter a new basic
+  // block.
+  //
+  void visitBasicBlock(BasicBlock &) {
+    PoolDescMap.clear();  // Forget state.
    }
  
-  void visitInstruction(Instruction *I) {
-    cerr << "Unknown instruction to FunctionBodyTransformer:\n";
-    I->dump();
+  // Starting with an empty basic block, we scan it looking for loads of the
+  // pool descriptor.  When we find a load, we add it to the PoolDescMap,
+  // indicating that we have a value available to recycle next time we see the
+  // poolbase of this instruction being loaded.
+  //
+  void visitLoadInst(LoadInst &LI) {
+    Value *LoadAddr = LI.getPointerOperand();
+    map<Value*, LoadInst*>::iterator VIt = PoolDescMap.find(LoadAddr);
+    if (VIt != PoolDescMap.end()) {  // We already have a value for this load?
+      LI.replaceAllUsesWith(VIt->second);   // Make the current load dead
+      ++Eliminated;
+    } else {
+      // This load might not be a load of a pool pointer, check to see if it is
+      if (LI.getNumOperands() == 4 &&  // load pool, uint 0, ubyte 0, ubyte 0
+          find(PoolDescValues.begin(), PoolDescValues.end(), LoadAddr) !=
+          PoolDescValues.end()) {
+
+        assert("Make sure it's a load of the pool base, not a chaining field" &&
+               LI.getOperand(1) == Constant::getNullValue(Type::UIntTy) &&
+               LI.getOperand(2) == Constant::getNullValue(Type::UByteTy) &&
+               LI.getOperand(3) == Constant::getNullValue(Type::UByteTy));
+
+        // If it is a load of a pool base, keep track of it for future reference
+        PoolDescMap.insert(std::make_pair(LoadAddr, &LI));
+        ++Remaining;
+      }
+    }
    }
  
+  // If we run across a function call, forget all state...  Calls to
+  // poolalloc/poolfree can invalidate the pool base pointer, so it should be
+  // reloaded the next time it is used.  Furthermore, a call to a random
+  // function might call one of these functions, so be conservative.  Through
+  // more analysis, this could be improved in the future.
+  //
+  void visitCallInst(CallInst &) {
+    PoolDescMap.clear();
+  }
  };
  
+static void addNodeMapping(DSNode *SrcNode, const PointerValSet &PVS,
+                           map<DSNode*, PointerValSet> &NodeMapping) {
+  for (unsigned i = 0, e = PVS.size(); i != e; ++i)
+    if (NodeMapping[SrcNode].add(PVS[i])) {  // Not in map yet?
+      assert(PVS[i].Index == 0 && "Node indexing not supported yet!");
+      DSNode *DestNode = PVS[i].Node;
+
+      // Loop over all of the outgoing links in the mapped graph
+      for (unsigned l = 0, le = DestNode->getNumOutgoingLinks(); l != le; ++l) {
+        PointerValSet &SrcSet = SrcNode->getOutgoingLink(l);
+        const PointerValSet &DestSet = DestNode->getOutgoingLink(l);
+
+        // Add all of the node mappings now!
+        for (unsigned si = 0, se = SrcSet.size(); si != se; ++si) {
+          assert(SrcSet[si].Index == 0 && "Can't handle node offset!");
+          addNodeMapping(SrcSet[si].Node, DestSet, NodeMapping);
+        }
+      }
+    }
+}
+
+// CalculateNodeMapping - There is a partial isomorphism between the graph
+// passed in and the graph that is actually used by the function.  We need to
+// figure out what this mapping is so that we can transformFunctionBody the
+// instructions in the function itself.  Note that every node in the graph that
+// we are interested in must be both in the local graph of the called function,
+// and in the local graph of the calling function.  Because of this, we only
+// define the mapping for these nodes [conveniently these are the only nodes we
+// CAN define a mapping for...]
+//
+// The roots of the graph that we are transforming is rooted in the arguments
+// passed into the function from the caller.  This is where we start our
+// mapping calculation.
+//
+// The NodeMapping calculated maps from the callers graph to the called graph.
+//
+static void CalculateNodeMapping(Function *F, TransformFunctionInfo &TFI,
+                                 FunctionDSGraph &CallerGraph,
+                                 FunctionDSGraph &CalledGraph, 
+                                 map<DSNode*, PointerValSet> &NodeMapping) {
+  int LastArgNo = -2;
+  for (unsigned i = 0, e = TFI.ArgInfo.size(); i != e; ++i) {
+    // Figure out what nodes in the called graph the TFI.ArgInfo[i].Node node
+    // corresponds to...
+    //
+    // Only consider first node of sequence.  Extra nodes may may be added
+    // to the TFI if the data structure requires more nodes than just the
+    // one the argument points to.  We are only interested in the one the
+    // argument points to though.
+    //
+    if (TFI.ArgInfo[i].ArgNo != LastArgNo) {
+      if (TFI.ArgInfo[i].ArgNo == -1) {
+        addNodeMapping(TFI.ArgInfo[i].Node, CalledGraph.getRetNodes(),
+                       NodeMapping);
+      } else {
+        // Figure out which node argument # ArgNo points to in the called graph.
+        Function::aiterator AI = F->abegin();
+        std::advance(AI, TFI.ArgInfo[i].ArgNo);
+        addNodeMapping(TFI.ArgInfo[i].Node, CalledGraph.getValueMap()[AI],
+                       NodeMapping);
+      }
+      LastArgNo = TFI.ArgInfo[i].ArgNo;
+    }
+  }
+}
+
+
  
-static void addCallInfo(DataStructure *DS,
-                        TransformFunctionInfo &TFI, CallInst *CI, int Arg, 
-                        DSNode *GraphNode,
-                        map<DSNode*, Value*> &PoolDescriptors) {
+
+// addCallInfo - For a specified function call CI, figure out which pool
+// descriptors need to be passed in as arguments, and which arguments need to be
+// transformed into indices.  If Arg != -1, the specified call argument is
+// passed in as a pointer to a data structure.
+//
+void TransformFunctionInfo::addCallInfo(DataStructure *DS, CallInst *CI,
+                                        int Arg, DSNode *GraphNode,
+                                        map<DSNode*, PoolInfo> &PoolDescs) {
    assert(CI->getCalledFunction() && "Cannot handle indirect calls yet!");
-  assert(TFI.Func == 0 || TFI.Func == CI->getCalledFunction() &&
+  assert(Func == 0 || Func == CI->getCalledFunction() &&
           "Function call record should always call the same function!");
-  assert(TFI.Call == 0 || TFI.Call == CI &&
+  assert(Call == 0 || Call == CI &&
           "Call element already filled in with different value!");
-  TFI.Func = CI->getCalledFunction();
-  TFI.Call = CI;
-  //FunctionDSGraph &CalledGraph = DS->getClosedDSGraph(TFI.Func);
+  Func = CI->getCalledFunction();
+  Call = CI;
+  //FunctionDSGraph &CalledGraph = DS->getClosedDSGraph(Func);
  
    // For now, add the entire graph that is pointed to by the call argument.
    // This graph can and should be pruned to only what the function itself will
    // use, because often this will be a dramatically smaller subset of what we
    // are providing.
    //
+  // FIXME: This should use pool links instead of extra arguments!
+  //
    for (df_iterator<DSNode*> I = df_begin(GraphNode), E = df_end(GraphNode);
-       I != E; ++I) {
-    TFI.ArgInfo.push_back(CallArgInfo(Arg, *I, PoolDescriptors[*I]));
+       I != E; ++I)
+    ArgInfo.push_back(CallArgInfo(Arg, *I, PoolDescs[*I].Handle));
+}
+
+static void markReachableNodes(const PointerValSet &Vals,
+                               set<DSNode*> &ReachableNodes) {
+  for (unsigned n = 0, ne = Vals.size(); n != ne; ++n) {
+    DSNode *N = Vals[n].Node;
+    if (ReachableNodes.count(N) == 0)   // Haven't already processed node?
+      ReachableNodes.insert(df_begin(N), df_end(N)); // Insert all
    }
  }
  
+// Make sure that all dependant arguments are added to this transformation info.
+// For example, if we call foo(null, P) and foo treats it's first and second
+// arguments as belonging to the same data structure, the we MUST add entries to
+// know that the null needs to be transformed into an index as well.
+//
+void TransformFunctionInfo::ensureDependantArgumentsIncluded(DataStructure *DS,
+                                           map<DSNode*, PoolInfo> &PoolDescs) {
+  // FIXME: This does not work for indirect function calls!!!
+  if (Func == 0) return;  // FIXME!
+
+  // Make sure argument entries are sorted.
+  finalizeConstruction();
+
+  // Loop over the function signature, checking to see if there are any pointer
+  // arguments that we do not convert...  if there is something we haven't
+  // converted, set done to false.
+  //
+  unsigned PtrNo = 0;
+  bool Done = true;
+  if (isa<PointerType>(Func->getReturnType()))    // Make sure we convert retval
+    if (PtrNo < ArgInfo.size() && ArgInfo[PtrNo++].ArgNo == -1) {
+      // We DO transform the ret val... skip all possible entries for retval
+      while (PtrNo < ArgInfo.size() && ArgInfo[PtrNo].ArgNo == -1)
+        PtrNo++;
+    } else {
+      Done = false;
+    }
+
+  unsigned i = 0;
+  for (Function::aiterator I = Func->abegin(), E = Func->aend(); I!=E; ++I,++i){
+    if (isa<PointerType>(I->getType())) {
+      if (PtrNo < ArgInfo.size() && ArgInfo[PtrNo++].ArgNo == (int)i) {
+        // We DO transform this arg... skip all possible entries for argument
+        while (PtrNo < ArgInfo.size() && ArgInfo[PtrNo].ArgNo == (int)i)
+          PtrNo++;
+      } else {
+        Done = false;
+        break;
+      }
+    }
+  }
+
+  // If we already have entries for all pointer arguments and retvals, there
+  // certainly is no work to do.  Bail out early to avoid building relatively
+  // expensive data structures.
+  //
+  if (Done) return;
+
+#ifdef DEBUG_TRANSFORM_PROGRESS
+  cerr << "Must ensure dependant arguments for: " << Func->getName() << "\n";
+#endif
+
+  // Otherwise, we MIGHT have to add the arguments/retval if they are part of
+  // the same datastructure graph as some other argument or retval that we ARE
+  // processing.
+  //
+  // Get the data structure graph for the called function.
+  //
+  FunctionDSGraph &CalledDS = DS->getClosedDSGraph(Func);
+
+  // Build a mapping between the nodes in our current graph and the nodes in the
+  // called function's graph.  We build it based on our _incomplete_
+  // transformation information, because it contains all of the info that we
+  // should need.
+  //
+  map<DSNode*, PointerValSet> NodeMapping;
+  CalculateNodeMapping(Func, *this,
+                       DS->getClosedDSGraph(Call->getParent()->getParent()),
+                       CalledDS, NodeMapping);
+
+  // Build the inverted version of the node mapping, that maps from a node in
+  // the called functions graph to a single node in the caller graph.
+  // 
+  map<DSNode*, DSNode*> InverseNodeMap;
+  for (map<DSNode*, PointerValSet>::iterator I = NodeMapping.begin(),
+         E = NodeMapping.end(); I != E; ++I) {
+    PointerValSet &CalledNodes = I->second;
+    for (unsigned i = 0, e = CalledNodes.size(); i != e; ++i)
+      InverseNodeMap[CalledNodes[i].Node] = I->first;
+  }
+  NodeMapping.clear();  // Done with information, free memory
+  
+  // Build a set of reachable nodes from the arguments/retval that we ARE
+  // passing in...
+  set<DSNode*> ReachableNodes;
+
+  // Loop through all of the arguments, marking all of the reachable data
+  // structure nodes reachable if they are from this pointer...
+  //
+  for (unsigned i = 0, e = ArgInfo.size(); i != e; ++i) {
+    if (ArgInfo[i].ArgNo == -1) {
+      if (i == 0)   // Only process retvals once (performance opt)
+        markReachableNodes(CalledDS.getRetNodes(), ReachableNodes);
+    } else {  // If it's an argument value...
+      Function::aiterator AI = Func->abegin();
+      std::advance(AI, ArgInfo[i].ArgNo);
+      if (isa<PointerType>(AI->getType()))
+        markReachableNodes(CalledDS.getValueMap()[AI], ReachableNodes);
+    }
+  }
+
+  // Now that we know which nodes are already reachable, see if any of the
+  // arguments that we are not passing values in for can reach one of the
+  // existing nodes...
+  //
+
+  // <FIXME> IN THEORY, we should allow arbitrary paths from the argument to
+  // nodes we know about.  The problem is that if we do this, then I don't know
+  // how to get pool pointers for this head list.  Since we are completely
+  // deadline driven, I'll just allow direct accesses to the graph. </FIXME>
+  //
+  
+  PtrNo = 0;
+  if (isa<PointerType>(Func->getReturnType()))    // Make sure we convert retval
+    if (PtrNo < ArgInfo.size() && ArgInfo[PtrNo++].ArgNo == -1) {
+      // We DO transform the ret val... skip all possible entries for retval
+      while (PtrNo < ArgInfo.size() && ArgInfo[PtrNo].ArgNo == -1)
+        PtrNo++;
+    } else {
+      // See what the return value points to...
+
+      // FIXME: This should generalize to any number of nodes, just see if any
+      // are reachable.
+      assert(CalledDS.getRetNodes().size() == 1 &&
+             "Assumes only one node is returned");
+      DSNode *N = CalledDS.getRetNodes()[0].Node;
+      
+      // If the return value is not marked as being passed in, but it NEEDS to
+      // be transformed, then make it known now.
+      //
+      if (ReachableNodes.count(N)) {
+#ifdef DEBUG_TRANSFORM_PROGRESS
+        cerr << "ensure dependant arguments adds return value entry!\n";
+#endif
+        addCallInfo(DS, Call, -1, InverseNodeMap[N], PoolDescs);
+
+        // Keep sorted!
+        finalizeConstruction();
+      }
+    }
+
+  i = 0;
+  for (Function::aiterator I = Func->abegin(), E = Func->aend(); I!=E; ++I, ++i)
+    if (isa<PointerType>(I->getType())) {
+      if (PtrNo < ArgInfo.size() && ArgInfo[PtrNo++].ArgNo == (int)i) {
+        // We DO transform this arg... skip all possible entries for argument
+        while (PtrNo < ArgInfo.size() && ArgInfo[PtrNo].ArgNo == (int)i)
+          PtrNo++;
+      } else {
+        // This should generalize to any number of nodes, just see if any are
+        // reachable.
+        assert(CalledDS.getValueMap()[I].size() == 1 &&
+               "Only handle case where pointing to one node so far!");
+
+        // If the arg is not marked as being passed in, but it NEEDS to
+        // be transformed, then make it known now.
+        //
+        DSNode *N = CalledDS.getValueMap()[I][0].Node;
+        if (ReachableNodes.count(N)) {
+#ifdef DEBUG_TRANSFORM_PROGRESS
+          cerr << "ensure dependant arguments adds for arg #" << i << "\n";
+#endif
+          addCallInfo(DS, Call, i, InverseNodeMap[N], PoolDescs);
+
+          // Keep sorted!
+          finalizeConstruction();
+        }
+      }
+    }
+}
+
  
  // transformFunctionBody - This transforms the instruction in 'F' to use the
-// pools specified in PoolDescriptors when modifying data structure nodes
-// specified in the PoolDescriptors map.  Specifically, scalar values specified
-// in the Scalars vector must be remapped.  IPFGraph is the closed data
-// structure graph for F, of which the PoolDescriptor nodes come from.
+// pools specified in PoolDescs when modifying data structure nodes specified in
+// the PoolDescs map.  Specifically, scalar values specified in the Scalars
+// vector must be remapped.  IPFGraph is the closed data structure graph for F,
+// of which the PoolDescriptor nodes come from.
  //
  void PoolAllocate::transformFunctionBody(Function *F, FunctionDSGraph &IPFGraph,
-                                       map<DSNode*, Value*> &PoolDescriptors) {
+                                         map<DSNode*, PoolInfo> &PoolDescs) {
  
    // Loop through the value map looking for scalars that refer to nonescaping
    // allocations.  Add them to the Scalars vector.  Note that we may have
@@ -434,33 +1093,39 @@ void PoolAllocate::transformFunctionBody(Function *F, FunctionDSGraph &IPFGraph,
    map<Value*, PointerValSet> &ValMap = IPFGraph.getValueMap();
    vector<ScalarInfo> Scalars;
  
-  cerr << "Building scalar map:\n";
+#ifdef DEBUG_TRANSFORM_PROGRESS
+  cerr << "Building scalar map for fn '" << F->getName() << "' body:\n";
+#endif
  
    for (map<Value*, PointerValSet>::iterator I = ValMap.begin(),
           E = ValMap.end(); I != E; ++I) {
      const PointerValSet &PVS = I->second;  // Set of things pointed to by scalar
  
-    cerr << "Scalar Mapping from:"; I->first->dump();
-    cerr << "\nScalar Mapping to: "; PVS.print(cerr);
-
      // Check to see if the scalar points to a data structure node...
      for (unsigned i = 0, e = PVS.size(); i != e; ++i) {
+      if (PVS[i].Index) { cerr << "Problem in " << F->getName() << " for " << I->first << "\n"; }
        assert(PVS[i].Index == 0 && "Nonzero not handled yet!");
          
        // If the allocation is in the nonescaping set...
-      map<DSNode*, Value*>::iterator AI = PoolDescriptors.find(PVS[i].Node);
-      if (AI != PoolDescriptors.end()) // Add it to the list of scalars
-        Scalars.push_back(ScalarInfo(I->first, PVS[i].Node, AI->second));
+      map<DSNode*, PoolInfo>::iterator AI = PoolDescs.find(PVS[i].Node);
+      if (AI != PoolDescs.end()) {              // Add it to the list of scalars
+        Scalars.push_back(ScalarInfo(I->first, AI->second));
+#ifdef DEBUG_TRANSFORM_PROGRESS
+        cerr << "\nScalar Mapping from:" << I->first
+             << "Scalar Mapping to: "; PVS.print(cerr);
+#endif
+      }
      }
    }
  
-
-
+#ifdef DEBUG_TRANSFORM_PROGRESS
    cerr << "\nIn '" << F->getName()
         << "': Found the following values that point to poolable nodes:\n";
  
    for (unsigned i = 0, e = Scalars.size(); i != e; ++i)
-    Scalars[i].Val->dump();
+    cerr << Scalars[i].Val;
+  cerr << "\n";
+#endif
  
    // CallMap - Contain an entry for every call instruction that needs to be
    // transformed.  Each entry in the map contains information about what we need
@@ -468,7 +1133,7 @@ void PoolAllocate::transformFunctionBody(Function *F, FunctionDSGraph &IPFGraph,
    //
    map<CallInst*, TransformFunctionInfo> CallMap;
  
-  // Now we need to figure out what called methods we need to transform, and
+  // Now we need to figure out what called functions we need to transform, and
    // how.  To do this, we look at all of the scalars, seeing which functions are
    // either used as a scalar value (so they return a data structure), or are
    // passed one of our scalar values.
@@ -479,7 +1144,7 @@ void PoolAllocate::transformFunctionBody(Function *F, FunctionDSGraph &IPFGraph,
      // Check to see if the scalar _IS_ a call...
      if (CallInst *CI = dyn_cast<CallInst>(ScalarVal))
        // If so, add information about the pool it will be returning...
-      addCallInfo(DS, CallMap[CI], CI, -1, Scalars[i].Node, PoolDescriptors);
+      CallMap[CI].addCallInfo(DS, CI, -1, Scalars[i].Pool.Node, PoolDescs);
  
      // Check to see if the scalar is an operand to a call...
      for (Value::use_iterator UI = ScalarVal->use_begin(),
@@ -494,23 +1159,32 @@ void PoolAllocate::transformFunctionBody(Function *F, FunctionDSGraph &IPFGraph,
          // than once!  It will get multiple entries for the first pointer.
  
          // Add the operand number and pool handle to the call table...
-        addCallInfo(DS, CallMap[CI], CI, OI-CI->op_begin()-1, Scalars[i].Node,
-                    PoolDescriptors);
+        CallMap[CI].addCallInfo(DS, CI, OI-CI->op_begin()-1,
+                                Scalars[i].Pool.Node, PoolDescs);
        }
      }
    }
  
+  // Make sure that all dependant arguments are added as well.  For example, if
+  // we call foo(null, P) and foo treats it's first and second arguments as
+  // belonging to the same data structure, the we MUST set up the CallMap to
+  // know that the null needs to be transformed into an index as well.
+  //
+  for (map<CallInst*, TransformFunctionInfo>::iterator I = CallMap.begin();
+       I != CallMap.end(); ++I)
+    I->second.ensureDependantArgumentsIncluded(DS, PoolDescs);
+
+#ifdef DEBUG_TRANSFORM_PROGRESS
    // Print out call map...
    for (map<CallInst*, TransformFunctionInfo>::iterator I = CallMap.begin();
         I != CallMap.end(); ++I) {
-    cerr << "\nFor call: ";
-    I->first->dump();
-    I->second.finalizeConstruction();
+    cerr << "For call: " << I->first;
      cerr << I->second.Func->getName() << " must pass pool pointer for args #";
      for (unsigned i = 0; i < I->second.ArgInfo.size(); ++i)
        cerr << I->second.ArgInfo[i].ArgNo << ", ";
-    cerr << "\n";
+    cerr << "\n\n";
    }
+#endif
  
    // Loop through all of the call nodes, recursively creating the new functions
    // that we want to call...  This uses a map to prevent infinite recursion and
@@ -518,16 +1192,13 @@ void PoolAllocate::transformFunctionBody(Function *F, FunctionDSGraph &IPFGraph,
    //
    for (map<CallInst*, TransformFunctionInfo>::iterator I = CallMap.begin(),
           E = CallMap.end(); I != E; ++I) {
-    // Make sure the entries are sorted.
-    I->second.finalizeConstruction();
-
      // Transform all of the functions we need, or at least ensure there is a
      // cached version available.
-    transformFunction(I->second, IPFGraph);
+    transformFunction(I->second, IPFGraph, PoolDescs);
    }
  
    // Now that all of the functions that we want to call are available, transform
-  // the local method so that it uses the pools locally and passes them to the
+  // the local function so that it uses the pools locally and passes them to the
    // functions that we just hacked up.
    //
  
@@ -543,105 +1214,118 @@ void PoolAllocate::transformFunctionBody(Function *F, FunctionDSGraph &IPFGraph,
      // All all of the instructions that use the scalar as an operand...
      for (Value::use_iterator UI = ScalarVal->use_begin(),
             UE = ScalarVal->use_end(); UI != UE; ++UI)
-      InstToFix.push_back(dyn_cast<Instruction>(*UI));
+      InstToFix.push_back(cast<Instruction>(*UI));
    }
  
+  // Make sure that we get return instructions that return a null value from the
+  // function...
+  //
+  if (!IPFGraph.getRetNodes().empty()) {
+    assert(IPFGraph.getRetNodes().size() == 1 && "Can only return one node?");
+    PointerVal RetNode = IPFGraph.getRetNodes()[0];
+    assert(RetNode.Index == 0 && "Subindexing not implemented yet!");
+
+    // Only process return instructions if the return value of this function is
+    // part of one of the data structures we are transforming...
+    //
+    if (PoolDescs.count(RetNode.Node)) {
+      // Loop over all of the basic blocks, adding return instructions...
+      for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
+        if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator()))
+          InstToFix.push_back(RI);
+    }
+  }
+
+
+
    // Eliminate duplicates by sorting, then removing equal neighbors.
    sort(InstToFix.begin(), InstToFix.end());
    InstToFix.erase(unique(InstToFix.begin(), InstToFix.end()), InstToFix.end());
  
-  // Use a FunctionBodyTransformer to transform all of the involved instructions
-  FunctionBodyTransformer FBT(*this, Scalars, CallMap);
-  for (unsigned i = 0, e = InstToFix.size(); i != e; ++i)
-    FBT.visit(InstToFix[i]);
+  // Loop over all of the instructions to transform, creating the new
+  // replacement instructions for them.  This also unlinks them from the
+  // function so they can be safely deleted later.
+  //
+  map<Value*, Value*> XFormMap;  
+  NewInstructionCreator NIC(*this, Scalars, CallMap, XFormMap);
  
+  // Visit all instructions... creating the new instructions that we need and
+  // unlinking the old instructions from the function...
+  //
+#ifdef DEBUG_TRANSFORM_PROGRESS
+  for (unsigned i = 0, e = InstToFix.size(); i != e; ++i) {
+    cerr << "Fixing: " << InstToFix[i];
+    NIC.visit(*InstToFix[i]);
+  }
+#else
+  NIC.visit(InstToFix.begin(), InstToFix.end());
+#endif
  
-  // Since we have liberally hacked the function to pieces, we want to inform
-  // the datastructure pass that its internal representation is out of date.
+  // Make all instructions we will delete "let go" of their operands... so that
+  // we can safely delete Arguments whose types have changed...
    //
-  DS->invalidateFunction(F);
-}
+  for_each(InstToFix.begin(), InstToFix.end(),
+           std::mem_fun(&Instruction::dropAllReferences));
  
-static void addNodeMapping(DSNode *SrcNode, const PointerValSet &PVS,
-                           map<DSNode*, PointerValSet> &NodeMapping) {
-  for (unsigned i = 0, e = PVS.size(); i != e; ++i)
-    if (NodeMapping[SrcNode].add(PVS[i])) {  // Not in map yet?
-      assert(PVS[i].Index == 0 && "Node indexing not supported yet!");
-      DSNode *DestNode = PVS[i].Node;
+  // Loop through all of the pointer arguments coming into the function,
+  // replacing them with arguments of POINTERTYPE to match the function type of
+  // the function.
+  //
+  FunctionType::ParamTypes::const_iterator TI =
+    F->getFunctionType()->getParamTypes().begin();
+  for (Function::aiterator I = F->abegin(), E = F->aend(); I != E; ++I, ++TI) {
+    if (I->getType() != *TI) {
+      assert(isa<PointerType>(I->getType()) && *TI == POINTERTYPE);
+      Argument *NewArg = new Argument(*TI, I->getName());
+      XFormMap[I] = NewArg;  // Map old arg into new arg...
+
+      // Replace the old argument and then delete it...
+      I = F->getArgumentList().erase(I);
+      I = F->getArgumentList().insert(I, NewArg);
+    }
+  }
  
-      // Loop over all of the outgoing links in the mapped graph
-      for (unsigned l = 0, le = DestNode->getNumOutgoingLinks(); l != le; ++l) {
-        PointerValSet &SrcSet = SrcNode->getOutgoingLink(l);
-        const PointerValSet &DestSet = DestNode->getOutgoingLink(l);
+  // Now that all of the new instructions have been created, we can update all
+  // of the references to dummy values to be references to the actual values
+  // that are computed.
+  //
+  NIC.updateReferences();
  
-        // Add all of the node mappings now!
-        for (unsigned si = 0, se = SrcSet.size(); si != se; ++si) {
-          assert(SrcSet[si].Index == 0 && "Can't handle node offset!");
-          addNodeMapping(SrcSet[si].Node, DestSet, NodeMapping);
-        }
-      }
-    }
-}
+#ifdef DEBUG_TRANSFORM_PROGRESS
+  cerr << "TRANSFORMED FUNCTION:\n" << F;
+#endif
  
-// CalculateNodeMapping - There is a partial isomorphism between the graph
-// passed in and the graph that is actually used by the function.  We need to
-// figure out what this mapping is so that we can transformFunctionBody the
-// instructions in the function itself.  Note that every node in the graph that
-// we are interested in must be both in the local graph of the called function,
-// and in the local graph of the calling function.  Because of this, we only
-// define the mapping for these nodes [conveniently these are the only nodes we
-// CAN define a mapping for...]
-//
-// The roots of the graph that we are transforming is rooted in the arguments
-// passed into the function from the caller.  This is where we start our
-// mapping calculation.
-//
-// The NodeMapping calculated maps from the callers graph to the called graph.
-//
-static void CalculateNodeMapping(Function *F, TransformFunctionInfo &TFI,
-                                 FunctionDSGraph &CallerGraph,
-                                 FunctionDSGraph &CalledGraph, 
-                                 map<DSNode*, PointerValSet> &NodeMapping) {
-  int LastArgNo = -2;
-  for (unsigned i = 0, e = TFI.ArgInfo.size(); i != e; ++i) {
-    // Figure out what nodes in the called graph the TFI.ArgInfo[i].Node node
-    // corresponds to...
-    //
-    // Only consider first node of sequence.  Extra nodes may may be added
-    // to the TFI if the data structure requires more nodes than just the
-    // one the argument points to.  We are only interested in the one the
-    // argument points to though.
-    //
-    if (TFI.ArgInfo[i].ArgNo != LastArgNo) {
-      if (TFI.ArgInfo[i].ArgNo == -1) {
-        addNodeMapping(TFI.ArgInfo[i].Node, CalledGraph.getRetNodes(),
-                       NodeMapping);
-      } else {
-        // Figure out which node argument # ArgNo points to in the called graph.
-        Value *Arg = F->getArgumentList()[TFI.ArgInfo[i].ArgNo];     
-        addNodeMapping(TFI.ArgInfo[i].Node, CalledGraph.getValueMap()[Arg],
-                       NodeMapping);
-      }
-      LastArgNo = TFI.ArgInfo[i].ArgNo;
-    }
-  }
+  // Delete all of the "instructions to fix"
+  for_each(InstToFix.begin(), InstToFix.end(), deleter<Instruction>);
+
+  // Eliminate pool base loads that we can easily prove are redundant
+  if (!DisableRLE)
+    PoolBaseLoadEliminator(PoolDescs).visit(F);
+
+  // Since we have liberally hacked the function to pieces, we want to inform
+  // the datastructure pass that its internal representation is out of date.
+  //
+  DS->invalidateFunction(F);
  }
  
  
+
  // transformFunction - Transform the specified function the specified way.  It
  // we have already transformed that function that way, don't do anything.  The
  // nodes in the TransformFunctionInfo come out of callers data structure graph.
  //
  void PoolAllocate::transformFunction(TransformFunctionInfo &TFI,
-                                     FunctionDSGraph &CallerIPGraph) {
+                                     FunctionDSGraph &CallerIPGraph,
+                                     map<DSNode*, PoolInfo> &CallerPoolDesc) {
    if (getTransformedFunction(TFI)) return;  // Function xformation already done?
  
-  cerr << "**********\nEntering transformFunction for "
+#ifdef DEBUG_TRANSFORM_PROGRESS
+  cerr << "********** Entering transformFunction for "
         << TFI.Func->getName() << ":\n";
    for (unsigned i = 0, e = TFI.ArgInfo.size(); i != e; ++i)
      cerr << "  ArgInfo[" << i << "] = " << TFI.ArgInfo[i].ArgNo << "\n";
    cerr << "\n";
-
+#endif
  
    const FunctionType *OldFuncType = TFI.Func->getFunctionType();
  
@@ -649,25 +1333,40 @@ void PoolAllocate::transformFunction(TransformFunctionInfo &TFI,
  
    // Build the type for the new function that we are transforming
    vector<const Type*> ArgTys;
+  ArgTys.reserve(OldFuncType->getNumParams()+TFI.ArgInfo.size());
    for (unsigned i = 0, e = OldFuncType->getNumParams(); i != e; ++i)
      ArgTys.push_back(OldFuncType->getParamType(i));
  
+  const Type *RetType = OldFuncType->getReturnType();
+  
    // Add one pool pointer for every argument that needs to be supplemented.
-  ArgTys.insert(ArgTys.end(), TFI.ArgInfo.size(), PoolTy);
+  for (unsigned i = 0, e = TFI.ArgInfo.size(); i != e; ++i) {
+    if (TFI.ArgInfo[i].ArgNo == -1)
+      RetType = POINTERTYPE;  // Return a pointer
+    else
+      ArgTys[TFI.ArgInfo[i].ArgNo] = POINTERTYPE; // Pass a pointer
+    ArgTys.push_back(PointerType::get(CallerPoolDesc.find(TFI.ArgInfo[i].Node)
+                                        ->second.PoolType));
+  }
  
    // Build the new function type...
-  const // FIXME when types are not const
-  FunctionType *NewFuncType = FunctionType::get(OldFuncType->getReturnType(),
-                                                ArgTys,OldFuncType->isVarArg());
+  const FunctionType *NewFuncType = FunctionType::get(RetType, ArgTys,
+                                                      OldFuncType->isVarArg());
  
    // The new function is internal, because we know that only we can call it.
    // This also helps subsequent IP transformations to eliminate duplicated pool
-  // pointers. [in the future when they are implemented].
+  // pointers (which look like the same value is always passed into a parameter,
+  // allowing it to be easily eliminated).
    //
    Function *NewFunc = new Function(NewFuncType, true,
                                     TFI.Func->getName()+".poolxform");
    CurModule->getFunctionList().push_back(NewFunc);
  
+
+#ifdef DEBUG_TRANSFORM_PROGRESS
+  cerr << "Created function prototype: " << NewFunc << "\n";
+#endif
+
    // Add the newly formed function to the TransformedFunctions table so that
    // infinite recursion does not occur!
    //
@@ -675,21 +1374,23 @@ void PoolAllocate::transformFunction(TransformFunctionInfo &TFI,
  
    // Add arguments to the function... starting with all of the old arguments
    vector<Value*> ArgMap;
-  for (unsigned i = 0, e = TFI.Func->getArgumentList().size(); i != e; ++i) {
-    const FunctionArgument *OFA = TFI.Func->getArgumentList()[i];
-    FunctionArgument *NFA = new FunctionArgument(OFA->getType(),OFA->getName());
+  for (Function::const_aiterator I = TFI.Func->abegin(), E = TFI.Func->aend();
+       I != E; ++I) {
+    Argument *NFA = new Argument(I->getType(), I->getName());
      NewFunc->getArgumentList().push_back(NFA);
      ArgMap.push_back(NFA);  // Keep track of the arguments 
    }
  
    // Now add all of the arguments corresponding to pools passed in...
    for (unsigned i = 0, e = TFI.ArgInfo.size(); i != e; ++i) {
+    CallArgInfo &AI = TFI.ArgInfo[i];
      string Name;
-    if (TFI.ArgInfo[i].ArgNo == -1)
-      Name = "retpool";
+    if (AI.ArgNo == -1)
+      Name = "ret";
      else
-      Name = ArgMap[TFI.ArgInfo[i].ArgNo]->getName();  // Get the arg name
-    FunctionArgument *NFA = new FunctionArgument(PoolTy, Name+".pool");
+      Name = ArgMap[AI.ArgNo]->getName();  // Get the arg name
+    const Type *Ty = PointerType::get(CallerPoolDesc[AI.Node].PoolType);
+    Argument *NFA = new Argument(Ty, Name+".pool");
      NewFunc->getArgumentList().push_back(NFA);
    }
  
@@ -703,7 +1404,13 @@ void PoolAllocate::transformFunction(TransformFunctionInfo &TFI,
    //
    FunctionDSGraph &DSGraph = DS->getClosedDSGraph(NewFunc);  
  
-  // NodeMapping - Multimap from callers graph to called graph.
+  // NodeMapping - Multimap from callers graph to called graph.  We are
+  // guaranteed that the called function graph has more nodes than the caller,
+  // or exactly the same number of nodes.  This is because the called function
+  // might not know that two nodes are merged when considering the callers
+  // context, but the caller obviously does.  Because of this, a single node in
+  // the calling function's data structure graph can map to multiple nodes in
+  // the called functions graph.
    //
    map<DSNode*, PointerValSet> NodeMapping;
  
@@ -711,6 +1418,7 @@ void PoolAllocate::transformFunction(TransformFunctionInfo &TFI,
                         NodeMapping);
  
    // Print out the node mapping...
+#ifdef DEBUG_TRANSFORM_PROGRESS
    cerr << "\nNode mapping for call of " << NewFunc->getName() << "\n";
    for (map<DSNode*, PointerValSet>::iterator I = NodeMapping.begin();
         I != NodeMapping.end(); ++I) {
@@ -718,47 +1426,68 @@ void PoolAllocate::transformFunction(TransformFunctionInfo &TFI,
      cerr << "To:  "; I->second.print(cerr);
      cerr << "\n";
    }
+#endif
  
    // Fill in the PoolDescriptor information for the transformed function so that
    // it can determine which value holds the pool descriptor for each data
    // structure node that it accesses.
    //
-  map<DSNode*, Value*> PoolDescriptors;
+  map<DSNode*, PoolInfo> PoolDescs;
  
+#ifdef DEBUG_TRANSFORM_PROGRESS
    cerr << "\nCalculating the pool descriptor map:\n";
+#endif
  
-  // All of the pool descriptors must be passed in as arguments...
-  for (unsigned i = 0, e = TFI.ArgInfo.size(); i != e; ++i) {
-    DSNode *CallerNode = TFI.ArgInfo[i].Node;
-    Value  *CallerPool = TFI.ArgInfo[i].PoolHandle;
-
-    cerr << "Mapped caller node: "; CallerNode->print(cerr);
-    cerr << "Mapped caller pool: "; CallerPool->dump();
-
-    // Calculate the argument number that the pool is to the function call...
-    // The call instruction should not have the pool operands added yet.
-    unsigned ArgNo = TFI.Call->getNumOperands()-1+i;
-    cerr << "Should be argument #: " << ArgNo << "[i = " << i << "]\n";
-    assert(ArgNo < NewFunc->getArgumentList().size() &&
-           "Call already has pool arguments added??");
-
-    // Map the pool argument into the called function...
-    Value *CalleePool = NewFunc->getArgumentList()[ArgNo];
-
-    // Map the DSNode into the callee's DSGraph
-    const PointerValSet &CalleeNodes = NodeMapping[CallerNode];
-    for (unsigned n = 0, ne = CalleeNodes.size(); n != ne; ++n) {
-      assert(CalleeNodes[n].Index == 0 && "Indexed node not handled yet!");
-      DSNode *CalleeNode = CalleeNodes[n].Node;
-
-      cerr << "*** to callee node: "; CalleeNode->print(cerr);
-      cerr << "*** to callee pool: "; CalleePool->dump();
-      cerr << "\n";
-      
-      assert(CalleeNode && CalleePool && "Invalid nodes!");
-      Value *&PV = PoolDescriptors[CalleeNode];
-      //assert((PV == 0 || PV == CalleePool) && "Invalid node remapping!");
-      PV = CalleePool;         // Update the pool descriptor map!
+  // Calculate as much of the pool descriptor map as possible.  Since we have
+  // the node mapping between the caller and callee functions, and we have the
+  // pool descriptor information of the caller, we can calculate a partical pool
+  // descriptor map for the called function.
+  //
+  // The nodes that we do not have complete information for are the ones that
+  // are accessed by loading pointers derived from arguments passed in, but that
+  // are not passed in directly.  In this case, we have all of the information
+  // except a pool value.  If the called function refers to this pool, the pool
+  // value will be loaded from the pool graph and added to the map as neccesary.
+  //
+  for (map<DSNode*, PointerValSet>::iterator I = NodeMapping.begin();
+       I != NodeMapping.end(); ++I) {
+    DSNode *CallerNode = I->first;
+    PoolInfo &CallerPI = CallerPoolDesc[CallerNode];
+
+    // Check to see if we have a node pointer passed in for this value...
+    Value *CalleeValue = 0;
+    for (unsigned a = 0, ae = TFI.ArgInfo.size(); a != ae; ++a)
+      if (TFI.ArgInfo[a].Node == CallerNode) {
+        // Calculate the argument number that the pool is to the function
+        // call...  The call instruction should not have the pool operands added
+        // yet.
+        unsigned ArgNo = TFI.Call->getNumOperands()-1+a;
+#ifdef DEBUG_TRANSFORM_PROGRESS
+        cerr << "Should be argument #: " << ArgNo << "[i = " << a << "]\n";
+#endif
+        assert(ArgNo < NewFunc->asize() &&
+               "Call already has pool arguments added??");
+
+        // Map the pool argument into the called function...
+        Function::aiterator AI = NewFunc->abegin();
+        std::advance(AI, ArgNo);
+        CalleeValue = AI;
+        break;  // Found value, quit loop
+      }
+
+    // Loop over all of the data structure nodes that this incoming node maps to
+    // Creating a PoolInfo structure for them.
+    for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
+      assert(I->second[i].Index == 0 && "Doesn't handle subindexing yet!");
+      DSNode *CalleeNode = I->second[i].Node;
+     
+      // Add the descriptor.  We already know everything about it by now, much
+      // of it is the same as the caller info.
+      // 
+      PoolDescs.insert(std::make_pair(CalleeNode,
+                                 PoolInfo(CalleeNode, CalleeValue,
+                                          CallerPI.NewType,
+                                          CallerPI.PoolType)));
      }
    }
  
@@ -772,111 +1501,242 @@ void PoolAllocate::transformFunction(TransformFunctionInfo &TFI,
    // Now that we know everything we need about the function, transform the body
    // now!
    //
-  transformFunctionBody(NewFunc, DSGraph, PoolDescriptors);
-
-  cerr << "Function after transformation:\n";
-  NewFunc->dump();
+  transformFunctionBody(NewFunc, DSGraph, PoolDescs);
+  
+#ifdef DEBUG_TRANSFORM_PROGRESS
+  cerr << "Function after transformation:\n" << NewFunc;
+#endif
  }
  
+static unsigned countPointerTypes(const Type *Ty) {
+  if (isa<PointerType>(Ty)) {
+    return 1;
+  } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    unsigned Num = 0;
+    for (unsigned i = 0, e = STy->getElementTypes().size(); i != e; ++i)
+      Num += countPointerTypes(STy->getElementTypes()[i]);
+    return Num;
+  } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    return countPointerTypes(ATy->getElementType());
+  } else {
+    assert(Ty->isPrimitiveType() && "Unknown derived type!");
+    return 0;
+  }
+}
  
  // CreatePools - Insert instructions into the function we are processing to
  // create all of the memory pool objects themselves.  This also inserts
  // destruction code.  Add an alloca for each pool that is allocated to the
-// PoolDescriptors vector.
+// PoolDescs vector.
  //
  void PoolAllocate::CreatePools(Function *F, const vector<AllocDSNode*> &Allocs,
-                               map<DSNode*, Value*> &PoolDescriptors) {
-  // FIXME: This should use an IP version of the UnifyAllExits pass!
+                               map<DSNode*, PoolInfo> &PoolDescs) {
+  // Find all of the return nodes in the function...
    vector<BasicBlock*> ReturnNodes;
    for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
-    if (isa<ReturnInst>((*I)->getTerminator()))
-      ReturnNodes.push_back(*I);
+    if (isa<ReturnInst>(I->getTerminator()))
+      ReturnNodes.push_back(I);
+
+#ifdef DEBUG_CREATE_POOLS
+  cerr << "Allocs that we are pool allocating:\n";
+  for (unsigned i = 0, e = Allocs.size(); i != e; ++i)
+    Allocs[i]->dump();
+#endif
+
+  map<DSNode*, PATypeHolder> AbsPoolTyMap;
+
+  // First pass over the allocations to process...
+  for (unsigned i = 0, e = Allocs.size(); i != e; ++i) {
+    // Create the pooldescriptor mapping... with null entries for everything
+    // except the node & NewType fields.
+    //
+    map<DSNode*, PoolInfo>::iterator PI =
+      PoolDescs.insert(std::make_pair(Allocs[i], PoolInfo(Allocs[i]))).first;
+
+    // Add a symbol table entry for the new type if there was one for the old
+    // type...
+    string OldName = CurModule->getTypeName(Allocs[i]->getType());
+    if (OldName.empty()) OldName = "node";
+    CurModule->addTypeName(OldName+".p", PI->second.NewType);
+
+    // Create the abstract pool types that will need to be resolved in a second
+    // pass once an abstract type is created for each pool.
+    //
+    // Can only handle limited shapes for now...
+    const Type *OldNodeTy = Allocs[i]->getType();
+    vector<const Type*> PoolTypes;
+
+    // Pool type is the first element of the pool descriptor type...
+    PoolTypes.push_back(getPoolType(PoolDescs[Allocs[i]].NewType));
+
+    unsigned NumPointers = countPointerTypes(OldNodeTy);
+    while (NumPointers--)   // Add a different opaque type for each pointer
+      PoolTypes.push_back(OpaqueType::get());
+
+    assert(Allocs[i]->getNumLinks() == PoolTypes.size()-1 &&
+           "Node should have same number of pointers as pool!");
+
+    StructType *PoolType = StructType::get(PoolTypes);
+
+    // Add a symbol table entry for the pooltype if possible...
+    CurModule->addTypeName(OldName+".pool", PoolType);
+
+    // Create the pool type, with opaque values for pointers...
+    AbsPoolTyMap.insert(std::make_pair(Allocs[i], PoolType));
+#ifdef DEBUG_CREATE_POOLS
+    cerr << "POOL TY: " << AbsPoolTyMap.find(Allocs[i])->second.get() << "\n";
+#endif
+  }
    
+  // Now that we have types for all of the pool types, link them all together.
+  for (unsigned i = 0, e = Allocs.size(); i != e; ++i) {
+    PATypeHolder &PoolTyH = AbsPoolTyMap.find(Allocs[i])->second;
+
+    // Resolve all of the outgoing pointer types of this pool node...
+    for (unsigned p = 0, pe = Allocs[i]->getNumLinks(); p != pe; ++p) {
+      PointerValSet &PVS = Allocs[i]->getLink(p);
+      assert(!PVS.empty() && "Outgoing edge is empty, field unused, can"
+             " probably just leave the type opaque or something dumb.");
+      unsigned Out;
+      for (Out = 0; AbsPoolTyMap.count(PVS[Out].Node) == 0; ++Out)
+        assert(Out != PVS.size() && "No edge to an outgoing allocation node!?");
+      
+      assert(PVS[Out].Index == 0 && "Subindexing not implemented yet!");
  
-  // Create the code that goes in the entry and exit nodes for the method...
+      // The actual struct type could change each time through the loop, so it's
+      // NOT loop invariant.
+      const StructType *PoolTy = cast<StructType>(PoolTyH.get());
+
+      // Get the opaque type...
+      DerivedType *ElTy = (DerivedType*)(PoolTy->getElementTypes()[p+1].get());
+
+#ifdef DEBUG_CREATE_POOLS
+      cerr << "Refining " << ElTy << " of " << PoolTy << " to "
+           << AbsPoolTyMap.find(PVS[Out].Node)->second.get() << "\n";
+#endif
+
+      const Type *RefPoolTy = AbsPoolTyMap.find(PVS[Out].Node)->second.get();
+      ElTy->refineAbstractTypeTo(PointerType::get(RefPoolTy));
+
+#ifdef DEBUG_CREATE_POOLS
+      cerr << "Result pool type is: " << PoolTyH.get() << "\n";
+#endif
+    }
+  }
+
+  // Create the code that goes in the entry and exit nodes for the function...
    vector<Instruction*> EntryNodeInsts;
    for (unsigned i = 0, e = Allocs.size(); i != e; ++i) {
+    PoolInfo &PI = PoolDescs[Allocs[i]];
+    
+    // Fill in the pool type for this pool...
+    PI.PoolType = AbsPoolTyMap.find(Allocs[i])->second.get();
+    assert(!PI.PoolType->isAbstract() &&
+           "Pool type should not be abstract anymore!");
+
      // Add an allocation and a free for each pool...
-    AllocaInst *PoolAlloc = new AllocaInst(PoolTy, 0, "pool");
+    AllocaInst *PoolAlloc = new AllocaInst(PI.PoolType, 0,
+                                           CurModule->getTypeName(PI.PoolType));
+    PI.Handle = PoolAlloc;
      EntryNodeInsts.push_back(PoolAlloc);
-    PoolDescriptors[Allocs[i]] = PoolAlloc;   // Keep track of pool allocas
      AllocationInst *AI = Allocs[i]->getAllocation();
  
      // Initialize the pool.  We need to know how big each allocation is.  For
      // our purposes here, we assume we are allocating a scalar, or array of
      // constant size.
      //
-    unsigned ElSize = TargetData.getTypeSize(AI->getAllocatedType());
-    ElSize *= cast<ConstantUInt>(AI->getArraySize())->getValue();
+    unsigned ElSize = TargetData.getTypeSize(PI.NewType);
  
      vector<Value*> Args;
-    Args.push_back(PoolAlloc);    // Pool to initialize
      Args.push_back(ConstantUInt::get(Type::UIntTy, ElSize));
+    Args.push_back(PoolAlloc);    // Pool to initialize
      EntryNodeInsts.push_back(new CallInst(PoolInit, Args));
  
-    // Destroy the pool...
-    Args.pop_back();
-
+    // Add code to destroy the pool in all of the exit nodes of the function...
+    Args.clear();
+    Args.push_back(PoolAlloc);    // Pool to initialize
+    
      for (unsigned EN = 0, ENE = ReturnNodes.size(); EN != ENE; ++EN) {
        Instruction *Destroy = new CallInst(PoolDestroy, Args);
  
        // Insert it before the return instruction...
        BasicBlock *RetNode = ReturnNodes[EN];
-      RetNode->getInstList().insert(RetNode->end()-1, Destroy);
+      RetNode->getInstList().insert(RetNode->end()--, Destroy);
      }
    }
  
+  // Now that all of the pool descriptors have been created, link them together
+  // so that called functions can get links as neccesary...
+  //
+  for (unsigned i = 0, e = Allocs.size(); i != e; ++i) {
+    PoolInfo &PI = PoolDescs[Allocs[i]];
+
+    // For every pointer in the data structure, initialize a link that
+    // indicates which pool to access...
+    //
+    vector<Value*> Indices(2);
+    Indices[0] = ConstantUInt::get(Type::UIntTy, 0);
+    for (unsigned l = 0, le = PI.Node->getNumLinks(); l != le; ++l)
+      // Only store an entry for the field if the field is used!
+      if (!PI.Node->getLink(l).empty()) {
+        assert(PI.Node->getLink(l).size() == 1 && "Should have only one link!");
+        PointerVal PV = PI.Node->getLink(l)[0];
+        assert(PV.Index == 0 && "Subindexing not supported yet!");
+        PoolInfo &LinkedPool = PoolDescs[PV.Node];
+        Indices[1] = ConstantUInt::get(Type::UByteTy, 1+l);
+      
+        EntryNodeInsts.push_back(new StoreInst(LinkedPool.Handle, PI.Handle,
+                                               Indices));
+      }
+  }
+
    // Insert the entry node code into the entry block...
-  F->getEntryNode()->getInstList().insert(F->getEntryNode()->begin()+1,
+  F->getEntryNode().getInstList().insert(++F->getEntryNode().begin(),
                                            EntryNodeInsts.begin(),
                                            EntryNodeInsts.end());
  }
  
  
-// addPoolPrototypes - Add prototypes for the pool methods to the specified
+// addPoolPrototypes - Add prototypes for the pool functions to the specified
  // module and update the Pool* instance variables to point to them.
  //
-void PoolAllocate::addPoolPrototypes(Module *M) {
-  // Get PoolInit function...
+void PoolAllocate::addPoolPrototypes(Module &M) {
+  // Get poolinit function...
    vector<const Type*> Args;
-  Args.push_back(PoolTy);           // Pool to initialize
    Args.push_back(Type::UIntTy);     // Num bytes per element
-  FunctionType *PoolInitTy = FunctionType::get(Type::VoidTy, Args, false);
-  PoolInit = M->getOrInsertFunction("poolinit", PoolInitTy);
+  FunctionType *PoolInitTy = FunctionType::get(Type::VoidTy, Args, true);
+  PoolInit = M.getOrInsertFunction("poolinit", PoolInitTy);
  
    // Get pooldestroy function...
    Args.pop_back();  // Only takes a pool...
-  FunctionType *PoolDestroyTy = FunctionType::get(Type::VoidTy, Args, false);
-  PoolDestroy = M->getOrInsertFunction("pooldestroy", PoolDestroyTy);
-
-  const Type *PtrVoid = PointerType::get(Type::SByteTy);
+  FunctionType *PoolDestroyTy = FunctionType::get(Type::VoidTy, Args, true);
+  PoolDestroy = M.getOrInsertFunction("pooldestroy", PoolDestroyTy);
  
    // Get the poolalloc function...
-  FunctionType *PoolAllocTy = FunctionType::get(PtrVoid, Args, false);
-  PoolAlloc = M->getOrInsertFunction("poolalloc", PoolAllocTy);
+  FunctionType *PoolAllocTy = FunctionType::get(POINTERTYPE, Args, true);
+  PoolAlloc = M.getOrInsertFunction("poolalloc", PoolAllocTy);
  
    // Get the poolfree function...
-  Args.push_back(PtrVoid);
-  FunctionType *PoolFreeTy = FunctionType::get(Type::VoidTy, Args, false);
-  PoolFree = M->getOrInsertFunction("poolfree", PoolFreeTy);
+  Args.push_back(POINTERTYPE);       // Pointer to free
+  FunctionType *PoolFreeTy = FunctionType::get(Type::VoidTy, Args, true);
+  PoolFree = M.getOrInsertFunction("poolfree", PoolFreeTy);
  
-  // Add the %PoolTy type to the symbol table of the module...
-  M->addTypeName("PoolTy", PoolTy->getElementType());
+  Args[0] = Type::UIntTy;            // Number of slots to allocate
+  FunctionType *PoolAllocArrayTy = FunctionType::get(POINTERTYPE, Args, true);
+  PoolAllocArray = M.getOrInsertFunction("poolallocarray", PoolAllocArrayTy);
  }
  
  
-bool PoolAllocate::run(Module *M) {
+bool PoolAllocate::run(Module &M) {
    addPoolPrototypes(M);
-  CurModule = M;
+  CurModule = &M;
    
    DS = &getAnalysis<DataStructure>();
    bool Changed = false;
  
-  // We cannot use an iterator here because it will get invalidated when we add
-  // functions to the module later...
-  for (unsigned i = 0; i != M->size(); ++i)
-    if (!M->getFunctionList()[i]->isExternal()) {
-      Changed |= processFunction(M->getFunctionList()[i]);
+  for (Module::iterator I = M.begin(); I != M.end(); ++I)
+    if (!I->isExternal()) {
+      Changed |= processFunction(I);
        if (Changed) {
          cerr << "Only processing one function\n";
          break;
@@ -888,8 +1748,12 @@ bool PoolAllocate::run(Module *M) {
    return false;
  }
  
-
  // createPoolAllocatePass - Global function to access the functionality of this
  // pass...
  //
-Pass *createPoolAllocatePass() { return new PoolAllocate(); }
+Pass *createPoolAllocatePass() { 
+  assert(0 && "Pool allocator disabled!");
+  return 0;
+  //return new PoolAllocate(); 
+}
+#endif