Print "circular" warning message only in debug mode.
[oota-llvm.git] / lib / Target / SparcV9 / SparcV9InstrSelection.cpp
index 5e6aafd9e28e5d78434c1727adceb6d90df3fc65..5195f4ac29faef839c3603f35fff817e89f62238 100644 (file)
@@ -1,20 +1,15 @@
-// $Id$
-//***************************************************************************
-// File:
-//     SparcInstrSelection.cpp
-// 
-// Purpose:
-//      BURS instruction selection for SPARC V9 architecture.      
-//     
-// History:
-//     7/02/01  -  Vikram Adve  -  Created
-//**************************************************************************/
+//===-- SparcInstrSelection.cpp -------------------------------------------===//
+//
+//  BURS instruction selection for SPARC V9 architecture.      
+//
+//===----------------------------------------------------------------------===//
 
 #include "SparcInternals.h"
 #include "SparcInstrSelectionSupport.h"
 #include "SparcRegClassInfo.h"
 #include "llvm/CodeGen/InstrSelectionSupport.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrAnnot.h"
 #include "llvm/CodeGen/InstrForest.h"
 #include "llvm/CodeGen/InstrSelection.h"
 #include "llvm/CodeGen/MachineCodeForMethod.h"
 #include "llvm/iTerminators.h"
 #include "llvm/iMemory.h"
 #include "llvm/iOther.h"
-#include "llvm/BasicBlock.h"
 #include "llvm/Function.h"
-#include "llvm/ConstantVals.h"
+#include "llvm/Constants.h"
+#include "llvm/ConstantHandling.h"
 #include "Support/MathExtras.h"
 #include <math.h>
 using std::vector;
 
-//************************* Forward Declarations ***************************/
-
-
-static void SetMemOperands_Internal     (vector<MachineInstr*>& mvec,
-                                         vector<MachineInstr*>::iterator mvecI,
-                                         const InstructionNode* vmInstrNode,
-                                         Value* ptrVal,
-                                         std::vector<Value*>& idxVec,
-                                         const TargetMachine& target);
-
-
 //************************ Internal Functions ******************************/
 
 
@@ -146,7 +130,7 @@ ChooseBFpccInstruction(const InstructionNode* instrNode,
 static TmpInstruction*
 GetTmpForCC(Value* boolVal, const Function *F, const Type* ccType)
 {
-  typedef std::hash_map<const Value*, TmpInstruction*> BoolTmpCache;
+  typedef hash_map<const Value*, TmpInstruction*> BoolTmpCache;
   static BoolTmpCache boolToTmpCache;     // Map boolVal -> TmpInstruction*
   static const Function *lastFunction = 0;// Use to flush cache between funcs
   
@@ -173,12 +157,13 @@ ChooseBccInstruction(const InstructionNode* instrNode,
                      bool& isFPBranch)
 {
   InstructionNode* setCCNode = (InstructionNode*) instrNode->leftChild();
-  BinaryOperator* setCCInstr = (BinaryOperator*) setCCNode->getInstruction();
+  assert(setCCNode->getOpLabel() == SetCCOp);
+  BinaryOperator* setCCInstr =cast<BinaryOperator>(setCCNode->getInstruction());
   const Type* setCCType = setCCInstr->getOperand(0)->getType();
   
-  isFPBranch = (setCCType == Type::FloatTy || setCCType == Type::DoubleTy); 
+  isFPBranch = setCCType->isFloatingPoint(); // Return value: don't delete!
   
-  if (isFPBranch) 
+  if (isFPBranch)
     return ChooseBFpccInstruction(instrNode, setCCInstr);
   else
     return ChooseBpccInstruction(instrNode, setCCInstr);
@@ -284,42 +269,38 @@ ChooseConvertToFloatInstr(OpLabel vopCode, const Type* opType)
 }
 
 static inline MachineOpCode 
-ChooseConvertToIntInstr(OpLabel vopCode, const Type* opType)
+ChooseConvertFPToIntInstr(Type::PrimitiveID tid, const Type* opType)
 {
   MachineOpCode opCode = INVALID_OPCODE;;
-  
-  if (vopCode == ToSByteTy || vopCode == ToShortTy || vopCode == ToIntTy)
+
+  assert((opType == Type::FloatTy || opType == Type::DoubleTy)
+         && "This function should only be called for FLOAT or DOUBLE");
+
+  if (tid==Type::UIntTyID)
     {
-      switch (opType->getPrimitiveID())
-        {
-        case Type::FloatTyID:   opCode = FSTOI; break;
-        case Type::DoubleTyID:  opCode = FDTOI; break;
-        default:
-          assert(0 && "Non-numeric non-bool type cannot be converted to Int");
-          break;
-        }
+      assert(tid != Type::UIntTyID && "FP-to-uint conversions must be expanded"
+             " into FP->long->uint for SPARC v9:  SO RUN PRESELECTION PASS!");
     }
-  else if (vopCode == ToLongTy)
+  else if (tid==Type::SByteTyID || tid==Type::ShortTyID || tid==Type::IntTyID ||
+           tid==Type::UByteTyID || tid==Type::UShortTyID)
     {
-      switch (opType->getPrimitiveID())
-        {
-        case Type::FloatTyID:   opCode = FSTOX; break;
-        case Type::DoubleTyID:  opCode = FDTOX; break;
-        default:
-          assert(0 && "Non-numeric non-bool type cannot be converted to Long");
-          break;
-        }
+      opCode = (opType == Type::FloatTy)? FSTOI : FDTOI;
+    }
+  else if (tid==Type::LongTyID || tid==Type::ULongTyID)
+    {
+      opCode = (opType == Type::FloatTy)? FSTOX : FDTOX;
     }
   else
       assert(0 && "Should not get here, Mo!");
-  
+
   return opCode;
 }
 
 MachineInstr*
-CreateConvertToIntInstr(OpLabel vopCode, Value* srcVal, Value* destVal)
+CreateConvertFPToIntInstr(Type::PrimitiveID destTID,
+                          Value* srcVal, Value* destVal)
 {
-  MachineOpCode opCode = ChooseConvertToIntInstr(vopCode, srcVal->getType());
+  MachineOpCode opCode = ChooseConvertFPToIntInstr(destTID, srcVal->getType());
   assert(opCode != INVALID_OPCODE && "Expected to need conversion!");
   
   MachineInstr* M = new MachineInstr(opCode);
@@ -328,28 +309,49 @@ CreateConvertToIntInstr(OpLabel vopCode, Value* srcVal, Value* destVal)
   return M;
 }
 
-static inline MachineOpCode 
-ChooseAddInstructionByType(const Type* resultType)
+// CreateCodeToConvertFloatToInt: Convert FP value to signed or unsigned integer
+// The FP value must be converted to the dest type in an FP register,
+// and the result is then copied from FP to int register via memory.
+//
+// Since fdtoi converts to signed integers, any FP value V between MAXINT+1
+// and MAXUNSIGNED (i.e., 2^31 <= V <= 2^32-1) would be converted incorrectly
+// *only* when converting to an unsigned int.  (Unsigned byte, short or long
+// don't have this problem.)
+// For unsigned int, we therefore have to generate the code sequence:
+// 
+//      if (V > (float) MAXINT) {
+//        unsigned result = (unsigned) (V  - (float) MAXINT);
+//        result = result + (unsigned) MAXINT;
+//      }
+//      else
+//        result = (unsigned int) V;
+// 
+static void
+CreateCodeToConvertFloatToInt(const TargetMachine& target,
+                              Value* opVal,
+                              Instruction* destI,
+                              std::vector<MachineInstr*>& mvec,
+                              MachineCodeForInstruction& mcfi)
 {
-  MachineOpCode opCode = INVALID_OPCODE;
-  
-  if (resultType->isIntegral() ||
-      isa<PointerType>(resultType) ||
-      isa<FunctionType>(resultType) ||
-      resultType == Type::LabelTy ||
-      resultType == Type::BoolTy)
-    {
-      opCode = ADD;
-    }
-  else
-    switch(resultType->getPrimitiveID())
-      {
-      case Type::FloatTyID:  opCode = FADDS; break;
-      case Type::DoubleTyID: opCode = FADDD; break;
-      default: assert(0 && "Invalid type for ADD instruction"); break; 
-      }
-  
-  return opCode;
+  // Create a temporary to represent the FP register into which the
+  // int value will placed after conversion.  The type of this temporary
+  // depends on the type of FP register to use: single-prec for a 32-bit
+  // int or smaller; double-prec for a 64-bit int.
+  // 
+  size_t destSize = target.DataLayout.getTypeSize(destI->getType());
+  const Type* destTypeToUse = (destSize > 4)? Type::DoubleTy : Type::FloatTy;
+  TmpInstruction* destForCast = new TmpInstruction(destTypeToUse, opVal);
+  mcfi.addTemp(destForCast);
+
+  // Create the fp-to-int conversion code
+  MachineInstr* M =CreateConvertFPToIntInstr(destI->getType()->getPrimitiveID(),
+                                             opVal, destForCast);
+  mvec.push_back(M);
+
+  // Create the fpreg-to-intreg copy code
+  target.getInstrInfo().
+    CreateCodeToCopyFloatToInt(target, destI->getParent()->getParent(),
+                               destForCast, destI, mvec, mcfi);
 }
 
 
@@ -385,14 +387,11 @@ CreateAddConstInstruction(const InstructionNode* instrNode)
   // (1) Add with 0 for float or double: use an FMOV of appropriate type,
   //    instead of an FADD (1 vs 3 cycles).  There is no integer MOV.
   // 
-  const Type* resultType = instrNode->getInstruction()->getType();
-  
-  if (resultType == Type::FloatTy ||
-      resultType == Type::DoubleTy)
-    {
-      double dval = cast<ConstantFP>(constOp)->getValue();
+  if (ConstantFP *FPC = dyn_cast<ConstantFP>(constOp)) {
+      double dval = FPC->getValue();
       if (dval == 0.0)
-        minstr = CreateMovFloatInstruction(instrNode, resultType);
+        minstr = CreateMovFloatInstruction(instrNode,
+                                   instrNode->getInstruction()->getType());
     }
   
   return minstr;
@@ -404,8 +403,7 @@ ChooseSubInstructionByType(const Type* resultType)
 {
   MachineOpCode opCode = INVALID_OPCODE;
   
-  if (resultType->isIntegral() ||
-      resultType->isPointerType())
+  if (resultType->isInteger() || isa<PointerType>(resultType))
     {
       opCode = SUB;
     }
@@ -433,15 +431,12 @@ CreateSubConstInstruction(const InstructionNode* instrNode)
   // (1) Sub with 0 for float or double: use an FMOV of appropriate type,
   //    instead of an FSUB (1 vs 3 cycles).  There is no integer MOV.
   // 
-  const Type* resultType = instrNode->getInstruction()->getType();
-  
-  if (resultType == Type::FloatTy ||
-      resultType == Type::DoubleTy)
-    {
-      double dval = cast<ConstantFP>(constOp)->getValue();
-      if (dval == 0.0)
-        minstr = CreateMovFloatInstruction(instrNode, resultType);
-    }
+  if (ConstantFP *FPC = dyn_cast<ConstantFP>(constOp)) {
+    double dval = FPC->getValue();
+    if (dval == 0.0)
+      minstr = CreateMovFloatInstruction(instrNode,
+                                        instrNode->getInstruction()->getType());
+  }
   
   return minstr;
 }
@@ -486,7 +481,7 @@ ChooseMulInstructionByType(const Type* resultType)
 {
   MachineOpCode opCode = INVALID_OPCODE;
   
-  if (resultType->isIntegral())
+  if (resultType->isInteger())
     opCode = MULX;
   else
     switch(resultType->getPrimitiveID())
@@ -513,19 +508,70 @@ CreateIntNegInstruction(const TargetMachine& target,
 }
 
 
+// Create instruction sequence for any shift operation.
+// SLL or SLLX on an operand smaller than the integer reg. size (64bits)
+// requires a second instruction for explicit sign-extension.
+// Note that we only have to worry about a sign-bit appearing in the
+// most significant bit of the operand after shifting (e.g., bit 32 of
+// Int or bit 16 of Short), so we do not have to worry about results
+// that are as large as a normal integer register.
+// 
+static inline void
+CreateShiftInstructions(const TargetMachine& target,
+                        Function* F,
+                        MachineOpCode shiftOpCode,
+                        Value* argVal1,
+                        Value* optArgVal2, /* Use optArgVal2 if not NULL */
+                        unsigned int optShiftNum, /* else use optShiftNum */
+                        Instruction* destVal,
+                        vector<MachineInstr*>& mvec,
+                        MachineCodeForInstruction& mcfi)
+{
+  assert((optArgVal2 != NULL || optShiftNum <= 64) &&
+         "Large shift sizes unexpected, but can be handled below: "
+         "You need to check whether or not it fits in immed field below");
+  
+  // If this is a logical left shift of a type smaller than the standard
+  // integer reg. size, we have to extend the sign-bit into upper bits
+  // of dest, so we need to put the result of the SLL into a temporary.
+  // 
+  Value* shiftDest = destVal;
+  unsigned opSize = target.DataLayout.getTypeSize(argVal1->getType());
+  if ((shiftOpCode == SLL || shiftOpCode == SLLX)
+      && opSize < target.DataLayout.getIntegerRegize())
+    { // put SLL result into a temporary
+      shiftDest = new TmpInstruction(argVal1, optArgVal2, "sllTmp");
+      mcfi.addTemp(shiftDest);
+    }
+  
+  MachineInstr* M = (optArgVal2 != NULL)
+    ? Create3OperandInstr(shiftOpCode, argVal1, optArgVal2, shiftDest)
+    : Create3OperandInstr_UImmed(shiftOpCode, argVal1, optShiftNum, shiftDest);
+  mvec.push_back(M);
+  
+  if (shiftDest != destVal)
+    { // extend the sign-bit of the result into all upper bits of dest
+      assert(8*opSize <= 32 && "Unexpected type size > 4 and < IntRegSize?");
+      target.getInstrInfo().
+        CreateSignExtensionInstructions(target, F, shiftDest, destVal,
+                                        8*opSize, mvec, mcfi);
+    }
+}
+
+
 // Does not create any instructions if we cannot exploit constant to
 // create a cheaper instruction.
 // This returns the approximate cost of the instructions generated,
 // which is used to pick the cheapest when both operands are constant.
 static inline unsigned int
-CreateMulConstInstruction(const TargetMachine &target,
-                          Value* lval, Value* rval, Value* destVal,
-                          vector<MachineInstr*>& mvec)
+CreateMulConstInstruction(const TargetMachine &target, Function* F,
+                          Value* lval, Value* rval, Instruction* destVal,
+                          vector<MachineInstr*>& mvec,
+                          MachineCodeForInstruction& mcfi)
 {
-  /* An integer multiply is generally more costly than FP multiply */ 
+  /* Use max. multiply cost, viz., cost of MULX */
   unsigned int cost = target.getInstrInfo().minLatency(MULX);
-  MachineInstr* minstr1 = NULL;
-  MachineInstr* minstr2 = NULL;
+  unsigned int firstNewInstr = mvec.size();
   
   Value* constOp = rval;
   if (! isa<Constant>(constOp))
@@ -537,13 +583,13 @@ CreateMulConstInstruction(const TargetMachine &target,
   // 
   const Type* resultType = destVal->getType();
   
-  if (resultType->isIntegral() || resultType->isPointerType())
+  if (resultType->isInteger() || isa<PointerType>(resultType))
     {
-      unsigned pow;
       bool isValidConst;
       int64_t C = GetConstantValueAsSignedInt(constOp, isValidConst);
       if (isValidConst)
         {
+          unsigned pow;
           bool needNeg = false;
           if (C < 0)
             {
@@ -554,69 +600,52 @@ CreateMulConstInstruction(const TargetMachine &target,
           if (C == 0 || C == 1)
             {
               cost = target.getInstrInfo().minLatency(ADD);
-              minstr1 = new MachineInstr(ADD);
-              if (C == 0)
-                minstr1->SetMachineOperandReg(0,
-                              target.getRegInfo().getZeroRegNum());
-              else
-                minstr1->SetMachineOperandVal(0,
-                              MachineOperand::MO_VirtualRegister, lval);
-              minstr1->SetMachineOperandReg(1,
-                                        target.getRegInfo().getZeroRegNum());
+              MachineInstr* M = (C == 0)
+                ? Create3OperandInstr_Reg(ADD,
+                                          target.getRegInfo().getZeroRegNum(),
+                                          target.getRegInfo().getZeroRegNum(),
+                                          destVal)
+                : Create3OperandInstr_Reg(ADD, lval,
+                                          target.getRegInfo().getZeroRegNum(),
+                                          destVal);
+              mvec.push_back(M);
             }
-          else if (IsPowerOf2(C, pow))
+          else if (isPowerOf2(C, pow))
             {
-              minstr1 = new MachineInstr((resultType == Type::LongTy)
-                                         ? SLLX : SLL);
-              minstr1->SetMachineOperandVal(0,
-                                MachineOperand::MO_VirtualRegister, lval);
-              minstr1->SetMachineOperandConst(1,
-                                MachineOperand::MO_UnextendedImmed, pow);
+              unsigned int opSize = target.DataLayout.getTypeSize(resultType);
+              MachineOpCode opCode = (opSize <= 32)? SLL : SLLX;
+              CreateShiftInstructions(target, F, opCode, lval, NULL, pow,
+                                      destVal, mvec, mcfi); 
             }
           
-          if (minstr1 && needNeg)
+          if (mvec.size() > 0 && needNeg)
             { // insert <reg = SUB 0, reg> after the instr to flip the sign
-              minstr2 = CreateIntNegInstruction(target, destVal);
-              cost += target.getInstrInfo().minLatency(minstr2->getOpCode());
+              MachineInstr* M = CreateIntNegInstruction(target, destVal);
+              mvec.push_back(M);
             }
         }
     }
   else
     {
-      if (resultType == Type::FloatTy ||
-          resultType == Type::DoubleTy)
+      if (ConstantFP *FPC = dyn_cast<ConstantFP>(constOp))
         {
-          double dval = cast<ConstantFP>(constOp)->getValue();
+          double dval = FPC->getValue();
           if (fabs(dval) == 1)
             {
-              bool needNeg = (dval < 0);
-              
-              MachineOpCode opCode = needNeg
+              MachineOpCode opCode =  (dval < 0)
                 ? (resultType == Type::FloatTy? FNEGS : FNEGD)
                 : (resultType == Type::FloatTy? FMOVS : FMOVD);
-              
-              minstr1 = new MachineInstr(opCode);
-              minstr1->SetMachineOperandVal(0,
-                                            MachineOperand::MO_VirtualRegister,
-                                            lval);
+              MachineInstr* M = Create2OperandInstr(opCode, lval, destVal);
+              mvec.push_back(M);
             } 
         }
     }
   
-  if (minstr1 != NULL)
-    minstr1->SetMachineOperandVal(2, MachineOperand::MO_VirtualRegister,
-                                  destVal);   
-  
-  if (minstr1)
-    {
-      mvec.push_back(minstr1);
-      cost = target.getInstrInfo().minLatency(minstr1->getOpCode());
-    }
-  if (minstr2)
+  if (firstNewInstr < mvec.size())
     {
-      assert(minstr1 && "Otherwise cost needs to be initialized to 0");
-      cost += target.getInstrInfo().minLatency(minstr2->getOpCode());
-      mvec.push_back(minstr2);
+      cost = 0;
+      for (unsigned int i=firstNewInstr; i < mvec.size(); ++i)
+        cost += target.getInstrInfo().minLatency(mvec[i]->getOpCode());
     }
   
   return cost;
@@ -628,28 +657,23 @@ CreateMulConstInstruction(const TargetMachine &target,
 // 
 static inline void
 CreateCheapestMulConstInstruction(const TargetMachine &target,
-                                  Value* lval, Value* rval, Value* destVal,
-                                  vector<MachineInstr*>& mvec)
+                                  Function* F,
+                                  Value* lval, Value* rval,
+                                  Instruction* destVal,
+                                  vector<MachineInstr*>& mvec,
+                                  MachineCodeForInstruction& mcfi)
 {
   Value* constOp;
   if (isa<Constant>(lval) && isa<Constant>(rval))
-    { // both operands are constant: try both orders!
-      vector<MachineInstr*> mvec1, mvec2;
-      unsigned int lcost = CreateMulConstInstruction(target, lval, rval,
-                                                     destVal, mvec1);
-      unsigned int rcost = CreateMulConstInstruction(target, rval, lval,
-                                                     destVal, mvec2);
-      vector<MachineInstr*>& mincostMvec =  (lcost <= rcost)? mvec1 : mvec2;
-      vector<MachineInstr*>& maxcostMvec =  (lcost <= rcost)? mvec2 : mvec1;
-      mvec.insert(mvec.end(), mincostMvec.begin(), mincostMvec.end()); 
-
-      for (unsigned int i=0; i < maxcostMvec.size(); ++i)
-        delete maxcostMvec[i];
+    { // both operands are constant: evaluate and "set" in dest
+      Constant* P = ConstantFoldBinaryInstruction(Instruction::Mul,
+                                  cast<Constant>(lval), cast<Constant>(rval));
+      target.getInstrInfo().CreateCodeToLoadConst(target,F,P,destVal,mvec,mcfi);
     }
   else if (isa<Constant>(rval))         // rval is constant, but not lval
-    CreateMulConstInstruction(target, lval, rval, destVal, mvec);
+    CreateMulConstInstruction(target, F, lval, rval, destVal, mvec, mcfi);
   else if (isa<Constant>(lval))         // lval is constant, but not rval
-    CreateMulConstInstruction(target, lval, rval, destVal, mvec);
+    CreateMulConstInstruction(target, F, lval, rval, destVal, mvec, mcfi);
   
   // else neither is constant
   return;
@@ -657,13 +681,14 @@ CreateCheapestMulConstInstruction(const TargetMachine &target,
 
 // Return NULL if we cannot exploit constant to create a cheaper instruction
 static inline void
-CreateMulInstruction(const TargetMachine &target,
-                     Value* lval, Value* rval, Value* destVal,
+CreateMulInstruction(const TargetMachine &target, Function* F,
+                     Value* lval, Value* rval, Instruction* destVal,
                      vector<MachineInstr*>& mvec,
+                     MachineCodeForInstruction& mcfi,
                      MachineOpCode forceMulOp = INVALID_MACHINE_OPCODE)
 {
   unsigned int L = mvec.size();
-  CreateCheapestMulConstInstruction(target, lval, rval, destVal, mvec);
+  CreateCheapestMulConstInstruction(target,F, lval, rval, destVal, mvec, mcfi);
   if (mvec.size() == L)
     { // no instructions were added so create MUL reg, reg, reg.
       // Use FSMULD if both operands are actually floats cast to doubles.
@@ -692,7 +717,7 @@ ChooseDivInstruction(TargetMachine &target,
   
   const Type* resultType = instrNode->getInstruction()->getType();
   
-  if (resultType->isIntegral())
+  if (resultType->isInteger())
     opCode = resultType->isSigned()? SDIVX : UDIVX;
   else
     switch(resultType->getPrimitiveID())
@@ -725,7 +750,7 @@ CreateDivConstInstruction(TargetMachine &target,
   // 
   const Type* resultType = instrNode->getInstruction()->getType();
   
-  if (resultType->isIntegral())
+  if (resultType->isInteger())
     {
       unsigned pow;
       bool isValidConst;
@@ -748,7 +773,7 @@ CreateDivConstInstruction(TargetMachine &target,
               minstr1->SetMachineOperandReg(1,
                                         target.getRegInfo().getZeroRegNum());
             }
-          else if (IsPowerOf2(C, pow))
+          else if (isPowerOf2(C, pow))
             {
               MachineOpCode opCode= ((resultType->isSigned())
                                      ? (resultType==Type::LongTy)? SRAX : SRA
@@ -771,10 +796,9 @@ CreateDivConstInstruction(TargetMachine &target,
     }
   else
     {
-      if (resultType == Type::FloatTy ||
-          resultType == Type::DoubleTy)
+      if (ConstantFP *FPC = dyn_cast<ConstantFP>(constOp))
         {
-          double dval = cast<ConstantFP>(constOp)->getValue();
+          double dval = FPC->getValue();
           if (fabs(dval) == 1)
             {
               bool needNeg = (dval < 0);
@@ -810,7 +834,8 @@ CreateCodeForVariableSizeAlloca(const TargetMachine& target,
                                 vector<MachineInstr*>& getMvec)
 {
   MachineInstr* M;
-  
+  MachineCodeForInstruction& mcfi = MachineCodeForInstruction::get(result);
+
   // Create a Value to hold the (constant) element size
   Value* tsizeVal = ConstantSInt::get(Type::IntTy, tsize);
 
@@ -827,14 +852,11 @@ CreateCodeForVariableSizeAlloca(const TargetMachine& target,
 
   // Create a temporary value to hold the result of MUL
   TmpInstruction* tmpProd = new TmpInstruction(numElementsVal, tsizeVal);
-  MachineCodeForInstruction::get(result).addTemp(tmpProd);
+  mcfi.addTemp(tmpProd);
   
   // Instruction 1: mul numElements, typeSize -> tmpProd
-  M = new MachineInstr(MULX);
-  M->SetMachineOperandVal(0, MachineOperand::MO_VirtualRegister, numElementsVal);
-  M->SetMachineOperandVal(1, MachineOperand::MO_VirtualRegister, tsizeVal);
-  M->SetMachineOperandVal(2, MachineOperand::MO_VirtualRegister, tmpProd);
-  getMvec.push_back(M);
+  CreateMulInstruction(target, F, numElementsVal, tsizeVal, tmpProd, getMvec,
+                       mcfi, INVALID_MACHINE_OPCODE);
         
   // Instruction 2: sub %sp, tmpProd -> %sp
   M = new MachineInstr(SUB);
@@ -859,6 +881,7 @@ CreateCodeForFixedSizeAlloca(const TargetMachine& target,
                              unsigned int numElements,
                              vector<MachineInstr*>& getMvec)
 {
+  assert(tsize > 0 && "Illegal (zero) type size for alloca");
   assert(result && result->getParent() &&
          "Result value is not part of a function?");
   Function *F = result->getParent()->getParent();
@@ -897,7 +920,6 @@ CreateCodeForFixedSizeAlloca(const TargetMachine& target,
 }
 
 
-
 //------------------------------------------------------------------------ 
 // Function SetOperandsForMemInstr
 //
@@ -916,131 +938,84 @@ CreateCodeForFixedSizeAlloca(const TargetMachine& target,
 
 static void
 SetOperandsForMemInstr(vector<MachineInstr*>& mvec,
-                       vector<MachineInstr*>::iterator mvecI,
-                       const InstructionNode* vmInstrNode,
+                       InstructionNode* vmInstrNode,
                        const TargetMachine& target)
 {
-  MemAccessInst* memInst = (MemAccessInst*) vmInstrNode->getInstruction();
-  
-  // Variables to hold the index vector, ptr value, and offset value.
-  // The major work here is to extract these for all 3 instruction types
-  // and then call the common function SetMemOperands_Internal().
-  // 
-  Value* ptrVal = memInst->getPointerOperand();
-  
-  // Start with the index vector of this instruction, if any.
-  vector<Value*> idxVec;
-  idxVec.insert(idxVec.end(), memInst->idx_begin(), memInst->idx_end());
-  
-  // If there is a GetElemPtr instruction to fold in to this instr,
-  // it must be in the left child for Load and GetElemPtr, and in the
-  // right child for Store instructions.
-  InstrTreeNode* ptrChild = (vmInstrNode->getOpLabel() == Instruction::Store
-                             ? vmInstrNode->rightChild()
-                             : vmInstrNode->leftChild()); 
-  
-  // Fold chains of GetElemPtr instructions for structure references.
-  if (isa<StructType>(cast<PointerType>(ptrVal->getType())->getElementType())
-      && (ptrChild->getOpLabel() == Instruction::GetElementPtr ||
-          ptrChild->getOpLabel() == GetElemPtrIdx))
-    {
-      Value* newPtr = FoldGetElemChain((InstructionNode*) ptrChild, idxVec);
-      if (newPtr)
-        ptrVal = newPtr;
-    }
-  
-  SetMemOperands_Internal(mvec, mvecI, vmInstrNode, ptrVal, idxVec, target);
-}
+  Instruction* memInst = vmInstrNode->getInstruction();
+  vector<MachineInstr*>::iterator mvecI = mvec.end() - 1;
 
+  // Index vector, ptr value, and flag if all indices are const.
+  vector<Value*> idxVec;
+  bool allConstantIndices;
+  Value* ptrVal = GetMemInstArgs(vmInstrNode, idxVec, allConstantIndices);
 
-// Generate the correct operands (and additional instructions if needed)
-// for the given pointer and given index vector.
-//
-static void
-SetMemOperands_Internal(vector<MachineInstr*>& mvec,
-                        vector<MachineInstr*>::iterator mvecI,
-                        const InstructionNode* vmInstrNode,
-                        Value* ptrVal,
-                        vector<Value*>& idxVec,
-                        const TargetMachine& target)
-{
-  MemAccessInst* memInst = (MemAccessInst*) vmInstrNode->getInstruction();
-  
-  // Initialize so we default to storing the offset in a register.
+  // Now create the appropriate operands for the machine instruction.
+  // First, initialize so we default to storing the offset in a register.
   int64_t smallConstOffset = 0;
   Value* valueForRegOffset = NULL;
-  MachineOperand::MachineOperandType offsetOpType =MachineOperand::MO_VirtualRegister;
+  MachineOperand::MachineOperandType offsetOpType =
+    MachineOperand::MO_VirtualRegister;
 
   // Check if there is an index vector and if so, compute the
   // right offset for structures and for arrays 
   // 
-  if (idxVec.size() > 0)
+  if (!idxVec.empty())
     {
-      unsigned offset = 0;
-      
       const PointerType* ptrType = cast<PointerType>(ptrVal->getType());
       
-      // Handle special common case of leading [0] index.
-      bool firstIndexIsZero =
-        bool(isa<ConstantUInt>(idxVec.front()) &&
-             cast<ConstantUInt>(idxVec.front())->getValue() == 0);
-      
-      // This is a real structure reference if the ptr target is a
-      // structure type, and the first offset is [0] (eliminate that offset).
-      if (firstIndexIsZero && ptrType->getElementType()->isStructType())
+      // If all indices are constant, compute the combined offset directly.
+      if (allConstantIndices)
         {
           // Compute the offset value using the index vector. Create a
           // virtual reg. for it since it may not fit in the immed field.
-          assert(idxVec.size() >= 2);
-          idxVec.erase(idxVec.begin());
-          unsigned offset = target.DataLayout.getIndexedOffset(ptrType,idxVec);
-          valueForRegOffset = ConstantSInt::get(Type::IntTy, offset);
+          uint64_t offset = target.DataLayout.getIndexedOffset(ptrType,idxVec);
+          valueForRegOffset = ConstantSInt::get(Type::LongTy, offset);
         }
       else
         {
-          // It is an array ref, and must have been lowered to a single offset.
-          assert((memInst->getNumOperands()
-                  == (unsigned) 1 + memInst->getFirstIndexOperandNumber())
+          // There is at least one non-constant offset.  Therefore, this must
+          // be an array ref, and must have been lowered to a single non-zero
+          // offset.  (An extra leading zero offset, if any, can be ignored.)
+          // Generate code sequence to compute address from index.
+          // 
+          bool firstIdxIsZero =
+            (idxVec[0] == Constant::getNullValue(idxVec[0]->getType()));
+          assert(idxVec.size() == 1U + firstIdxIsZero 
                  && "Array refs must be lowered before Instruction Selection");
-          
-          Value* arrayOffsetVal =  * memInst->idx_begin();
-          
-          // If index is 0, the offset value is just 0.  Otherwise, 
-          // generate a MUL instruction to compute address from index.
+
+          Value* idxVal = idxVec[firstIdxIsZero];
+
+          vector<MachineInstr*> mulVec;
+          Instruction* addr = new TmpInstruction(Type::ULongTy, memInst);
+          MachineCodeForInstruction::get(memInst).addTemp(addr);
+
+          // Get the array type indexed by idxVal, and compute its element size.
           // The call to getTypeSize() will fail if size is not constant.
+          const Type* vecType = (firstIdxIsZero
+                                 ? GetElementPtrInst::getIndexedType(ptrType,
+                                           std::vector<Value*>(1U, idxVec[0]),
+                                           /*AllowCompositeLeaf*/ true)
+                                 : ptrType);
+          const Type* eltType = cast<SequentialType>(vecType)->getElementType();
+          ConstantUInt* eltSizeVal = ConstantUInt::get(Type::ULongTy,
+                                       target.DataLayout.getTypeSize(eltType));
+
           // CreateMulInstruction() folds constants intelligently enough.
-          // 
-          if (firstIndexIsZero)
-            {
-              offsetOpType = MachineOperand::MO_SignExtendedImmed;
-              smallConstOffset = 0;
-            }
-          else
-            {
-              vector<MachineInstr*> mulVec;
-              Instruction* addr = new TmpInstruction(Type::UIntTy, memInst);
-              MachineCodeForInstruction::get(memInst).addTemp(addr);
-              
-              unsigned int eltSize =
-                target.DataLayout.getTypeSize(ptrType->getElementType());
-              assert(eltSize > 0 && "Invalid or non-const array element size");
-              ConstantUInt* eltVal = ConstantUInt::get(Type::UIntTy, eltSize);
-              
-              CreateMulInstruction(target,
-                                   arrayOffsetVal, /* lval, not likely const */
-                                   eltVal,         /* rval, likely constant */
-                                   addr,           /* result*/
-                                   mulVec, INVALID_MACHINE_OPCODE);
-              assert(mulVec.size() > 0 && "No multiply instruction created?");
-              for (vector<MachineInstr*>::const_iterator I = mulVec.begin();
-                   I != mulVec.end(); ++I)
-                {
-                  mvecI = mvec.insert(mvecI, *I);   // ptr to inserted value
-                  ++mvecI;                          // ptr to mem. instr.
-                }
-              
-              valueForRegOffset = addr;
-            }
+          CreateMulInstruction(target, memInst->getParent()->getParent(),
+                               idxVal,         /* lval, not likely to be const*/
+                               eltSizeVal,     /* rval, likely to be constant */
+                               addr,           /* result */
+                               mulVec, MachineCodeForInstruction::get(memInst),
+                               INVALID_MACHINE_OPCODE);
+
+          // Insert mulVec[] before *mvecI in mvec[] and update mvecI
+          // to point to the same instruction it pointed to before.
+          assert(mulVec.size() > 0 && "No multiply code created?");
+          vector<MachineInstr*>::iterator oldMvecI = mvecI;
+          for (unsigned i=0, N=mulVec.size(); i < N; ++i)
+            mvecI = mvec.insert(mvecI, mulVec[i]) + 1;  // pts to mem instr
+
+          valueForRegOffset = addr;
         }
     }
   else
@@ -1048,7 +1023,7 @@ SetMemOperands_Internal(vector<MachineInstr*>& mvec,
       offsetOpType = MachineOperand::MO_SignExtendedImmed;
       smallConstOffset = 0;
     }
-  
+
   // For STORE:
   //   Operand 0 is value, operand 1 is ptr, operand 2 is offset
   // For LOAD or GET_ELEMENT_PTR,
@@ -1125,7 +1100,6 @@ ForwardOperand(InstructionNode* treeNode,
     }
   else
     {
-      bool fwdSuccessful = false;
       for (unsigned i=0, N=mvec.size(); i < N; i++)
         {
           MachineInstr* minstr = mvec[i];
@@ -1134,94 +1108,33 @@ ForwardOperand(InstructionNode* treeNode,
               const MachineOperand& mop = minstr->getOperand(i);
               if (mop.getOperandType() == MachineOperand::MO_VirtualRegister &&
                   mop.getVRegValue() == unusedOp)
-                {
-                  minstr->SetMachineOperandVal(i,
+                minstr->SetMachineOperandVal(i,
                                 MachineOperand::MO_VirtualRegister, fwdOp);
-                  fwdSuccessful = true;
-                }
             }
           
           for (unsigned i=0,numOps=minstr->getNumImplicitRefs(); i<numOps; ++i)
             if (minstr->getImplicitRef(i) == unusedOp)
-              {
-                minstr->setImplicitRef(i, fwdOp,
-                                       minstr->implicitRefIsDefined(i));
-                fwdSuccessful = true;
-              }
+              minstr->setImplicitRef(i, fwdOp,
+                                     minstr->implicitRefIsDefined(i),
+                                     minstr->implicitRefIsDefinedAndUsed(i));
         }
-      assert(fwdSuccessful && "Value to be forwarded is never used!");
     }
 }
 
 
-void UltraSparcInstrInfo::
-CreateCopyInstructionsByType(const TargetMachine& target,
-                             Function *F,
-                             Value* src,
-                             Instruction* dest,
-                             vector<MachineInstr*>& minstrVec) const
+inline bool
+AllUsesAreBranches(const Instruction* setccI)
 {
-  bool loadConstantToReg = false;
-  
-  const Type* resultType = dest->getType();
-  
-  MachineOpCode opCode = ChooseAddInstructionByType(resultType);
-  if (opCode == INVALID_OPCODE)
-    {
-      assert(0 && "Unsupported result type in CreateCopyInstructionsByType()");
-      return;
-    }
-  
-  // if `src' is a constant that doesn't fit in the immed field or if it is
-  // a global variable (i.e., a constant address), generate a load
-  // instruction instead of an add
-  // 
-  if (isa<Constant>(src))
-    {
-      unsigned int machineRegNum;
-      int64_t immedValue;
-      MachineOperand::MachineOperandType opType =
-        ChooseRegOrImmed(src, opCode, target, /*canUseImmed*/ true,
-                         machineRegNum, immedValue);
-      
-      if (opType == MachineOperand::MO_VirtualRegister)
-        loadConstantToReg = true;
-    }
-  else if (isa<GlobalValue>(src))
-    loadConstantToReg = true;
-  
-  if (loadConstantToReg)
-    { // `src' is constant and cannot fit in immed field for the ADD
-      // Insert instructions to "load" the constant into a register
-      vector<TmpInstruction*> tempVec;
-      target.getInstrInfo().CreateCodeToLoadConst(F, src, dest,
-                                                  minstrVec, tempVec);
-      for (unsigned i=0; i < tempVec.size(); i++)
-        MachineCodeForInstruction::get(dest).addTemp(tempVec[i]);
-    }
-  else
-    { // Create an add-with-0 instruction of the appropriate type.
-      // Make `src' the second operand, in case it is a constant
-      // Use (unsigned long) 0 for a NULL pointer value.
-      // 
-      const Type* zeroValueType =
-        (resultType->getPrimitiveID() == Type::PointerTyID)? Type::ULongTy
-                                                           : resultType;
-      MachineInstr* minstr = new MachineInstr(opCode);
-      minstr->SetMachineOperandVal(0, MachineOperand::MO_VirtualRegister,
-                                   Constant::getNullConstant(zeroValueType));
-      minstr->SetMachineOperandVal(1, MachineOperand::MO_VirtualRegister, src);
-      minstr->SetMachineOperandVal(2, MachineOperand::MO_VirtualRegister,dest);
-      minstrVec.push_back(minstr);
-    }
+  for (Value::use_const_iterator UI=setccI->use_begin(), UE=setccI->use_end();
+       UI != UE; ++UI)
+    if (! isa<TmpInstruction>(*UI)     // ignore tmp instructions here
+        && cast<Instruction>(*UI)->getOpcode() != Instruction::Br)
+      return false;
+  return true;
 }
 
-
-
 //******************* Externally Visible Functions *************************/
 
-
-
 //------------------------------------------------------------------------ 
 // External Function: ThisIsAChainRule
 //
@@ -1235,7 +1148,6 @@ ThisIsAChainRule(int eruleno)
   switch(eruleno)
     {
     case 111:  // stmt:  reg
-    case 113:  // stmt:  bool
     case 123:
     case 124:
     case 125:
@@ -1254,9 +1166,10 @@ ThisIsAChainRule(int eruleno)
     case 242:
     case 243:
     case 244:
+    case 245:
     case 321:
       return true; break;
-      
+
     default:
       return false; break;
     }
@@ -1279,6 +1192,7 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
                       vector<MachineInstr*>& mvec)
 {
   bool checkCast = false;              // initialize here to use fall-through
+  bool maskUnsignedResult = false;
   int nextRule;
   int forwardOperandNum = -1;
   unsigned int allocaSize = 0;
@@ -1346,14 +1260,12 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         mvec.push_back(new MachineInstr(
                          ChooseStoreInstruction(
                             subtreeRoot->leftChild()->getValue()->getType())));
-        SetOperandsForMemInstr(mvec, mvec.end()-1, subtreeRoot, target);
+        SetOperandsForMemInstr(mvec, subtreeRoot, target);
         break;
 
       case 5:  // stmt:   BrUncond
         M = new MachineInstr(BA);
-        M->SetMachineOperandVal(0, MachineOperand::MO_CCRegister,
-                                      (Value*)NULL);
-        M->SetMachineOperandVal(1, MachineOperand::MO_PCRelativeDisp,
+        M->SetMachineOperandVal(0, MachineOperand::MO_PCRelativeDisp,
              cast<BranchInst>(subtreeRoot->getInstruction())->getSuccessor(0));
         mvec.push_back(M);
         
@@ -1373,8 +1285,8 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         Constant *constVal = cast<Constant>(constNode->getValue());
         bool isValidConst;
         
-        if ((constVal->getType()->isIntegral()
-             || constVal->getType()->isPointerType())
+        if ((constVal->getType()->isInteger()
+             || isa<PointerType>(constVal->getType()))
             && GetConstantValueAsSignedInt(constVal, isValidConst) == 0
             && isValidConst)
           {
@@ -1400,9 +1312,7 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
 
             // false branch
             M = new MachineInstr(BA);
-            M->SetMachineOperandVal(0, MachineOperand::MO_CCRegister,
-                                    (Value*) NULL);
-            M->SetMachineOperandVal(1, MachineOperand::MO_PCRelativeDisp,
+            M->SetMachineOperandVal(0, MachineOperand::MO_PCRelativeDisp,
                                     brInst->getSuccessor(1));
             mvec.push_back(M);
             
@@ -1414,18 +1324,16 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         // ELSE FALL THROUGH
       }
 
-      case 6:  // stmt:   BrCond(bool)
-      { // bool => boolean was computed with some boolean operator
-        // (SetCC, Not, ...).  We need to check whether the type was a FP,
-        // signed int or unsigned int, and check the branching condition in
-        // order to choose the branch to use.
+      case 6:  // stmt:   BrCond(setCC)
+      { // bool => boolean was computed with SetCC.
+        // The branch to use depends on whether it is FP, signed, or unsigned.
         // If it is an integer CC, we also need to find the unique
         // TmpInstruction representing that CC.
         // 
         BranchInst* brInst = cast<BranchInst>(subtreeRoot->getInstruction());
         bool isFPBranch;
         M = new MachineInstr(ChooseBccInstruction(subtreeRoot, isFPBranch));
-        
+
         Value* ccValue = GetTmpForCC(subtreeRoot->leftChild()->getValue(),
                                      brInst->getParent()->getParent(),
                                      isFPBranch? Type::FloatTy : Type::IntTy);
@@ -1434,18 +1342,16 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         M->SetMachineOperandVal(1, MachineOperand::MO_PCRelativeDisp,
                                    brInst->getSuccessor(0));
         mvec.push_back(M);
-        
+
         // delay slot
         mvec.push_back(new MachineInstr(NOP));
-        
+
         // false branch
         M = new MachineInstr(BA);
-        M->SetMachineOperandVal(0, MachineOperand::MO_CCRegister,
-                                   (Value*) NULL);
-        M->SetMachineOperandVal(1, MachineOperand::MO_PCRelativeDisp,
+        M->SetMachineOperandVal(0, MachineOperand::MO_PCRelativeDisp,
                                    brInst->getSuccessor(1));
         mvec.push_back(M);
-        
+
         // delay slot
         mvec.push_back(new MachineInstr(NOP));
         break;
@@ -1459,10 +1365,8 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         unsigned dest = cast<ConstantBool>(constVal)->getValue()? 0 : 1;
         
         M = new MachineInstr(BA);
-        M->SetMachineOperandVal(0, MachineOperand::MO_CCRegister,
-                                (Value*) NULL);
-        M->SetMachineOperandVal(1, MachineOperand::MO_PCRelativeDisp,
-          ((BranchInst*) subtreeRoot->getInstruction())->getSuccessor(dest));
+        M->SetMachineOperandVal(0, MachineOperand::MO_PCRelativeDisp,
+          cast<BranchInst>(subtreeRoot->getInstruction())->getSuccessor(dest));
         mvec.push_back(M);
         
         // delay slot
@@ -1478,7 +1382,7 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         M->SetMachineOperandVal(0, MachineOperand::MO_VirtualRegister,
                                       subtreeRoot->leftChild()->getValue());
         M->SetMachineOperandVal(1, MachineOperand::MO_PCRelativeDisp,
-              ((BranchInst*) subtreeRoot->getInstruction())->getSuccessor(0));
+              cast<BranchInst>(subtreeRoot->getInstruction())->getSuccessor(0));
         mvec.push_back(M);
 
         // delay slot
@@ -1486,10 +1390,8 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
 
         // false branch
         M = new MachineInstr(BA);
-        M->SetMachineOperandVal(0, MachineOperand::MO_CCRegister,
-                                (Value*) NULL);
-        M->SetMachineOperandVal(1, MachineOperand::MO_PCRelativeDisp,
-              ((BranchInst*) subtreeRoot->getInstruction())->getSuccessor(1));
+        M->SetMachineOperandVal(0, MachineOperand::MO_PCRelativeDisp,
+              cast<BranchInst>(subtreeRoot->getInstruction())->getSuccessor(1));
         mvec.push_back(M);
         
         // delay slot
@@ -1505,115 +1407,139 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         assert(0 && "VRegList should never be the topmost non-chain rule");
         break;
 
-      case 21: // bool:  Not(bool):    Both these are implemented as:
-      case 421:        // reg:   BNot(reg) :        reg = reg XOR-NOT 0
-        M = new MachineInstr(XNOR);
-        M->SetMachineOperandVal(0, MachineOperand::MO_VirtualRegister,
-                                subtreeRoot->leftChild()->getValue());
-        M->SetMachineOperandReg(1, target.getRegInfo().getZeroRegNum());
-        M->SetMachineOperandVal(2, MachineOperand::MO_VirtualRegister,
-                                subtreeRoot->getValue());
-        mvec.push_back(M);
+      case 21: // bool:  Not(bool,reg): Both these are implemented as:
+      case 421:        // reg:   BNot(reg,reg):        reg = reg XOR-NOT 0
+      { // First find the unary operand. It may be left or right, usually right.
+        Value* notArg = BinaryOperator::getNotArgument(
+                           cast<BinaryOperator>(subtreeRoot->getInstruction()));
+        mvec.push_back(Create3OperandInstr_Reg(XNOR, notArg,
+                                          target.getRegInfo().getZeroRegNum(),
+                                          subtreeRoot->getValue()));
         break;
+      }
 
-      case 322:        // reg:   ToBoolTy(bool):
       case 22: // reg:   ToBoolTy(reg):
       {
         const Type* opType = subtreeRoot->leftChild()->getValue()->getType();
-        assert(opType->isIntegral() || opType->isPointerType()
-               || opType == Type::BoolTy);
+        assert(opType->isIntegral() || isa<PointerType>(opType));
         forwardOperandNum = 0;          // forward first operand to user
         break;
       }
       
       case 23: // reg:   ToUByteTy(reg)
+      case 24: // reg:   ToSByteTy(reg)
       case 25: // reg:   ToUShortTy(reg)
+      case 26: // reg:   ToShortTy(reg)
       case 27: // reg:   ToUIntTy(reg)
-      case 29: // reg:   ToULongTy(reg)
+      case 28: // reg:   ToIntTy(reg)
       {
-        const Type* opType = subtreeRoot->leftChild()->getValue()->getType();
-        assert(opType->isIntegral() ||
-               opType->isPointerType() ||
-               opType == Type::BoolTy && "Cast is illegal for other types");
-        forwardOperandNum = 0;          // forward first operand to user
+        //======================================================================
+        // Rules for integer conversions:
+        // 
+        //--------
+        // From ISO 1998 C++ Standard, Sec. 4.7:
+        //
+        // 2. If the destination type is unsigned, the resulting value is
+        // the least unsigned integer congruent to the source integer
+        // (modulo 2n where n is the number of bits used to represent the
+        // unsigned type). [Note: In a two s complement representation,
+        // this conversion is conceptual and there is no change in the
+        // bit pattern (if there is no truncation). ]
+        // 
+        // 3. If the destination type is signed, the value is unchanged if
+        // it can be represented in the destination type (and bitfield width);
+        // otherwise, the value is implementation-defined.
+        //--------
+        // 
+        // Since we assume 2s complement representations, this implies:
+        // 
+        // -- if operand is smaller than destination, zero-extend or sign-extend
+        //    according to the signedness of the *operand*: source decides.
+        //    ==> we have to do nothing here!
+        // 
+        // -- if operand is same size as or larger than destination, and the
+        //    destination is *unsigned*, zero-extend the operand: dest. decides
+        // 
+        // -- if operand is same size as or larger than destination, and the
+        //    destination is *signed*, the choice is implementation defined:
+        //    we sign-extend the operand: i.e., again dest. decides.
+        //    Note: this matches both Sun's cc and gcc3.2.
+        //======================================================================
+
+        Instruction* destI =  subtreeRoot->getInstruction();
+        Value* opVal = subtreeRoot->leftChild()->getValue();
+        const Type* opType = opVal->getType();
+        if (opType->isIntegral() || isa<PointerType>(opType))
+          {
+            unsigned opSize = target.DataLayout.getTypeSize(opType);
+            unsigned destSize = target.DataLayout.getTypeSize(destI->getType());
+            if (opSize >= destSize)
+              { // Operand is same size as or larger than dest:
+                // zero- or sign-extend, according to the signeddness of
+                // the destination (see above).
+                if (destI->getType()->isSigned())
+                  target.getInstrInfo().CreateSignExtensionInstructions(target,
+                    destI->getParent()->getParent(), opVal, destI, 8*destSize,
+                    mvec, MachineCodeForInstruction::get(destI));
+                else
+                  target.getInstrInfo().CreateZeroExtensionInstructions(target,
+                    destI->getParent()->getParent(), opVal, destI, 8*destSize,
+                    mvec, MachineCodeForInstruction::get(destI));
+              }
+            else
+              forwardOperandNum = 0;          // forward first operand to user
+          }
+        else if (opType->isFloatingPoint())
+          {
+            CreateCodeToConvertFloatToInt(target, opVal, destI, mvec,
+                                         MachineCodeForInstruction::get(destI));
+            if (destI->getType()->isUnsigned())
+              maskUnsignedResult = true; // not handled by fp->int code
+          }
+        else
+          assert(0 && "Unrecognized operand type for convert-to-unsigned");
+
         break;
       }
-      
-      case 24: // reg:   ToSByteTy(reg)
-      case 26: // reg:   ToShortTy(reg)
-      case 28: // reg:   ToIntTy(reg)
+
+      case 29: // reg:   ToULongTy(reg)
       case 30: // reg:   ToLongTy(reg)
       {
-        const Type* opType = subtreeRoot->leftChild()->getValue()->getType();
-        if (opType->isIntegral()
-            || opType->isPointerType()
-            || opType == Type::BoolTy)
+        Value* opVal = subtreeRoot->leftChild()->getValue();
+        const Type* opType = opVal->getType();
+        if (opType->isIntegral() || isa<PointerType>(opType))
+          forwardOperandNum = 0;          // forward first operand to user
+        else if (opType->isFloatingPoint())
           {
-            forwardOperandNum = 0;          // forward first operand to user
+            Instruction* destI =  subtreeRoot->getInstruction();
+            CreateCodeToConvertFloatToInt(target, opVal, destI, mvec,
+                                         MachineCodeForInstruction::get(destI));
           }
         else
-          {
-            // If the source operand is an FP type, the int result must be
-            // copied from float to int register via memory!
-            Instruction *dest = subtreeRoot->getInstruction();
-            Value* leftVal = subtreeRoot->leftChild()->getValue();
-            Value* destForCast;
-            vector<MachineInstr*> minstrVec;
-            
-            if (opType == Type::FloatTy || opType == Type::DoubleTy)
-              {
-                // Create a temporary to represent the INT register
-                // into which the FP value will be copied via memory.
-                // The type of this temporary will determine the FP
-                // register used: single-prec for a 32-bit int or smaller,
-                // double-prec for a 64-bit int.
-                // 
-                const Type* destTypeToUse =
-                  (dest->getType() == Type::LongTy)? Type::DoubleTy
-                                                   : Type::FloatTy;
-                destForCast = new TmpInstruction(destTypeToUse, leftVal);
-                MachineCodeForInstruction &destMCFI = 
-                  MachineCodeForInstruction::get(dest);
-                destMCFI.addTemp(destForCast);
-                
-                vector<TmpInstruction*> tempVec;
-                target.getInstrInfo().CreateCodeToCopyFloatToInt(
-                    dest->getParent()->getParent(),
-                    (TmpInstruction*) destForCast, dest,
-                    minstrVec, tempVec, target);
-                
-                for (unsigned i=0; i < tempVec.size(); ++i)
-                  destMCFI.addTemp(tempVec[i]);
-              }
-            else
-              destForCast = leftVal;
-            
-            M = CreateConvertToIntInstr(subtreeRoot->getOpLabel(),
-                                        leftVal, destForCast);
-            mvec.push_back(M);
-            
-            // Append the copy code, if any, after the conversion instr.
-            mvec.insert(mvec.end(), minstrVec.begin(), minstrVec.end());
-          }
+          assert(0 && "Unrecognized operand type for convert-to-signed");
         break;
-      }  
+      }
       
       case  31:        // reg:   ToFloatTy(reg):
       case  32:        // reg:   ToDoubleTy(reg):
       case 232:        // reg:   ToDoubleTy(Constant):
-        
+      
         // If this instruction has a parent (a user) in the tree 
         // and the user is translated as an FsMULd instruction,
         // then the cast is unnecessary.  So check that first.
         // In the future, we'll want to do the same for the FdMULq instruction,
         // so do the check here instead of only for ToFloatTy(reg).
         // 
-        if (subtreeRoot->parent() != NULL &&
-            MachineCodeForInstruction::get(((InstructionNode*)subtreeRoot->parent())->getInstruction())[0]->getOpCode() == FSMULD)
+        if (subtreeRoot->parent() != NULL)
           {
-            forwardOperandNum = 0;          // forward first operand to user
+            const MachineCodeForInstruction& mcfi =
+              MachineCodeForInstruction::get(
+                cast<InstructionNode>(subtreeRoot->parent())->getInstruction());
+            if (mcfi.size() == 0 || mcfi.front()->getOpCode() == FSMULD)
+              forwardOperandNum = 0;    // forward first operand to user
           }
-        else
+
+        if (forwardOperandNum != 0)     // we do need the cast
           {
             Value* leftVal = subtreeRoot->leftChild()->getValue();
             const Type* opType = leftVal->getType();
@@ -1630,7 +1556,7 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
                 Instruction *dest = subtreeRoot->getInstruction();
                 Value* srcForCast;
                 int n = 0;
-                if (opType != Type::FloatTy && opType != Type::DoubleTy)
+                if (! opType->isFloatingPoint())
                   {
                     // Create a temporary to represent the FP register
                     // into which the integer will be copied via memory.
@@ -1638,30 +1564,23 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
                     // register used: single-prec for a 32-bit int or smaller,
                     // double-prec for a 64-bit int.
                     // 
-                    const Type* srcTypeToUse =
-                      (leftVal->getType() == Type::LongTy)? Type::DoubleTy
-                                                          : Type::FloatTy;
-                    
-                    srcForCast = new TmpInstruction(srcTypeToUse, dest);
+                    uint64_t srcSize =
+                      target.DataLayout.getTypeSize(leftVal->getType());
+                    Type* tmpTypeToUse =
+                      (srcSize <= 4)? Type::FloatTy : Type::DoubleTy;
+                    srcForCast = new TmpInstruction(tmpTypeToUse, dest);
                     MachineCodeForInstruction &destMCFI = 
                       MachineCodeForInstruction::get(dest);
                     destMCFI.addTemp(srcForCast);
-                    
-                    vector<MachineInstr*> minstrVec;
-                    vector<TmpInstruction*> tempVec;
-                    target.getInstrInfo().CreateCodeToCopyIntToFloat(
+
+                    target.getInstrInfo().CreateCodeToCopyIntToFloat(target,
                          dest->getParent()->getParent(),
-                         leftVal, (TmpInstruction*) srcForCast,
-                         minstrVec, tempVec, target);
-                    
-                    mvec.insert(mvec.end(), minstrVec.begin(),minstrVec.end());
-                    
-                    for (unsigned i=0; i < tempVec.size(); ++i)
-                       destMCFI.addTemp(tempVec[i]);
+                         leftVal, cast<Instruction>(srcForCast),
+                         mvec, destMCFI);
                   }
                 else
                   srcForCast = leftVal;
-                
+
                 M = new MachineInstr(opCode);
                 M->SetMachineOperandVal(0, MachineOperand::MO_VirtualRegister,
                                            srcForCast);
@@ -1678,6 +1597,7 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         break;
 
       case 233:        // reg:   Add(reg, Constant)
+        maskUnsignedResult = true;
         M = CreateAddConstInstruction(subtreeRoot);
         if (M != NULL)
           {
@@ -1687,11 +1607,13 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         // ELSE FALL THROUGH
         
       case 33: // reg:   Add(reg, reg)
+        maskUnsignedResult = true;
         mvec.push_back(new MachineInstr(ChooseAddInstruction(subtreeRoot)));
         Set3OperandsFromInstr(mvec.back(), subtreeRoot, target);
         break;
 
       case 234:        // reg:   Sub(reg, Constant)
+        maskUnsignedResult = true;
         M = CreateSubConstInstruction(subtreeRoot);
         if (M != NULL)
           {
@@ -1701,6 +1623,7 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         // ELSE FALL THROUGH
         
       case 34: // reg:   Sub(reg, reg)
+        maskUnsignedResult = true;
         mvec.push_back(new MachineInstr(ChooseSubInstructionByType(
                                    subtreeRoot->getInstruction()->getType())));
         Set3OperandsFromInstr(mvec.back(), subtreeRoot, target);
@@ -1712,14 +1635,16 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
 
       case 35: // reg:   Mul(reg, reg)
       {
+        maskUnsignedResult = true;
         MachineOpCode forceOp = ((checkCast && BothFloatToDouble(subtreeRoot))
                                  ? FSMULD
                                  : INVALID_MACHINE_OPCODE);
-        CreateMulInstruction(target,
+        Instruction* mulInstr = subtreeRoot->getInstruction();
+        CreateMulInstruction(target, mulInstr->getParent()->getParent(),
                              subtreeRoot->leftChild()->getValue(),
                              subtreeRoot->rightChild()->getValue(),
-                             subtreeRoot->getInstruction(),
-                             mvec, forceOp);
+                             mulInstr, mvec,
+                             MachineCodeForInstruction::get(mulInstr),forceOp);
         break;
       }
       case 335:        // reg:   Mul(todouble, todoubleConst)
@@ -1728,17 +1653,21 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
 
       case 235:        // reg:   Mul(reg, Constant)
       {
+        maskUnsignedResult = true;
         MachineOpCode forceOp = ((checkCast && BothFloatToDouble(subtreeRoot))
                                  ? FSMULD
                                  : INVALID_MACHINE_OPCODE);
-        CreateMulInstruction(target,
+        Instruction* mulInstr = subtreeRoot->getInstruction();
+        CreateMulInstruction(target, mulInstr->getParent()->getParent(),
                              subtreeRoot->leftChild()->getValue(),
                              subtreeRoot->rightChild()->getValue(),
-                             subtreeRoot->getInstruction(),
-                             mvec, forceOp);
+                             mulInstr, mvec,
+                             MachineCodeForInstruction::get(mulInstr),
+                             forceOp);
         break;
       }
       case 236:        // reg:   Div(reg, Constant)
+        maskUnsignedResult = true;
         L = mvec.size();
         CreateDivConstInstruction(target, subtreeRoot, mvec);
         if (mvec.size() > L)
@@ -1746,6 +1675,7 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         // ELSE FALL THROUGH
       
       case 36: // reg:   Div(reg, reg)
+        maskUnsignedResult = true;
         mvec.push_back(new MachineInstr(ChooseDivInstruction(target, subtreeRoot)));
         Set3OperandsFromInstr(mvec.back(), subtreeRoot, target);
         break;
@@ -1753,6 +1683,7 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
       case  37:        // reg:   Rem(reg, reg)
       case 237:        // reg:   Rem(reg, Constant)
       {
+        maskUnsignedResult = true;
         Instruction* remInstr = subtreeRoot->getInstruction();
         
         TmpInstruction* quot = new TmpInstruction(
@@ -1768,12 +1699,10 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         M->SetMachineOperandVal(2, MachineOperand::MO_VirtualRegister,quot);
         mvec.push_back(M);
         
-        M = new MachineInstr(ChooseMulInstructionByType(
-                                   subtreeRoot->getInstruction()->getType()));
-        M->SetMachineOperandVal(0, MachineOperand::MO_VirtualRegister,quot);
-        M->SetMachineOperandVal(1, MachineOperand::MO_VirtualRegister,
-                                      subtreeRoot->rightChild()->getValue());
-        M->SetMachineOperandVal(2, MachineOperand::MO_VirtualRegister,prod);
+        M = Create3OperandInstr(ChooseMulInstructionByType(
+                                   subtreeRoot->getInstruction()->getType()),
+                                quot, subtreeRoot->rightChild()->getValue(),
+                                prod);
         mvec.push_back(M);
         
         M = new MachineInstr(ChooseSubInstructionByType(
@@ -1794,24 +1723,40 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         break;
 
       case 138:        // bool:   And(bool, not)
-      case 438:        // bool:   BAnd(bool, not)
-        mvec.push_back(new MachineInstr(ANDN));
-        Set3OperandsFromInstr(mvec.back(), subtreeRoot, target);
+      case 438:        // bool:   BAnd(bool, bnot)
+      { // Use the argument of NOT as the second argument!
+        // Mark the NOT node so that no code is generated for it.
+        InstructionNode* notNode = (InstructionNode*) subtreeRoot->rightChild();
+        Value* notArg = BinaryOperator::getNotArgument(
+                           cast<BinaryOperator>(notNode->getInstruction()));
+        notNode->markFoldedIntoParent();
+        mvec.push_back(Create3OperandInstr(ANDN,
+                                           subtreeRoot->leftChild()->getValue(),
+                                           notArg, subtreeRoot->getValue()));
         break;
+      }
 
       case  39:        // bool:   Or(bool, bool)
       case 239:        // bool:   Or(bool, boolconst)
       case 339:        // reg :   BOr(reg, reg)
       case 539:        // reg :   BOr(reg, Constant)
-        mvec.push_back(new MachineInstr(ORN));
+        mvec.push_back(new MachineInstr(OR));
         Set3OperandsFromInstr(mvec.back(), subtreeRoot, target);
         break;
 
       case 139:        // bool:   Or(bool, not)
-      case 439:        // bool:   BOr(bool, not)
-        mvec.push_back(new MachineInstr(ORN));
-        Set3OperandsFromInstr(mvec.back(), subtreeRoot, target);
+      case 439:        // bool:   BOr(bool, bnot)
+      { // Use the argument of NOT as the second argument!
+        // Mark the NOT node so that no code is generated for it.
+        InstructionNode* notNode = (InstructionNode*) subtreeRoot->rightChild();
+        Value* notArg = BinaryOperator::getNotArgument(
+                           cast<BinaryOperator>(notNode->getInstruction()));
+        notNode->markFoldedIntoParent();
+        mvec.push_back(Create3OperandInstr(ORN,
+                                           subtreeRoot->leftChild()->getValue(),
+                                           notArg, subtreeRoot->getValue()));
         break;
+      }
 
       case  40:        // bool:   Xor(bool, bool)
       case 240:        // bool:   Xor(bool, boolconst)
@@ -1822,10 +1767,18 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         break;
 
       case 140:        // bool:   Xor(bool, not)
-      case 440:        // bool:   BXor(bool, not)
-        mvec.push_back(new MachineInstr(XNOR));
-        Set3OperandsFromInstr(mvec.back(), subtreeRoot, target);
+      case 440:        // bool:   BXor(bool, bnot)
+      { // Use the argument of NOT as the second argument!
+        // Mark the NOT node so that no code is generated for it.
+        InstructionNode* notNode = (InstructionNode*) subtreeRoot->rightChild();
+        Value* notArg = BinaryOperator::getNotArgument(
+                           cast<BinaryOperator>(notNode->getInstruction()));
+        notNode->markFoldedIntoParent();
+        mvec.push_back(Create3OperandInstr(XNOR,
+                                           subtreeRoot->leftChild()->getValue(),
+                                           notArg, subtreeRoot->getValue()));
         break;
+      }
 
       case 41: // boolconst:   SetCC(reg, Constant)
         // 
@@ -1839,7 +1792,8 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         // a result register, and setting a condition code.
         // 
         // If the boolean result of the SetCC is used by anything other
-        // than a single branch instruction, the boolean must be
+        // than a branch instruction, or if it is used outside the current
+        // basic block, the boolean must be
         // computed and stored in the result register.  Otherwise, discard
         // the difference (by using %g0) and keep only the condition code.
         // 
@@ -1850,9 +1804,9 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         // 
         InstructionNode* parentNode = (InstructionNode*) subtreeRoot->parent();
         Instruction* setCCInstr = subtreeRoot->getInstruction();
-        bool keepBoolVal = (parentNode == NULL ||
-                            parentNode->getInstruction()->getOpcode()
-                                != Instruction::Br);
+        
+        bool keepBoolVal = parentNode == NULL ||
+                           ! AllUsesAreBranches(setCCInstr);
         bool subValIsBoolVal = setCCInstr->getOpcode() == Instruction::SetNE;
         bool keepSubVal = keepBoolVal && subValIsBoolVal;
         bool computeBoolVal = keepBoolVal && ! subValIsBoolVal;
@@ -1860,7 +1814,7 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         bool mustClearReg;
         int valueToMove;
         MachineOpCode movOpCode = 0;
-
+        
         // Mark the 4th operand as being a CC register, and as a def
         // A TmpInstruction is created to represent the CC "result".
         // Unlike other instances of TmpInstruction, this one is used
@@ -1873,12 +1827,11 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         // a FP condition code register.
         // 
         Value* leftVal = subtreeRoot->leftChild()->getValue();
-        bool isFPCompare = (leftVal->getType() == Type::FloatTy || 
-                            leftVal->getType() == Type::DoubleTy);
+        bool isFPCompare = leftVal->getType()->isFloatingPoint();
         
         TmpInstruction* tmpForCC = GetTmpForCC(setCCInstr,
                                      setCCInstr->getParent()->getParent(),
-                                     isFPCompare? Type::FloatTy : Type::IntTy);
+                                     isFPCompare ? Type::FloatTy : Type::IntTy);
         MachineCodeForInstruction::get(setCCInstr).addTemp(tmpForCC);
         
         if (! isFPCompare)
@@ -1933,29 +1886,26 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
               }
             
             // Now conditionally move `valueToMove' (0 or 1) into the register
+            // Mark the register as a use (as well as a def) because the old
+            // value should be retained if the condition is false.
             M = new MachineInstr(movOpCode);
             M->SetMachineOperandVal(0, MachineOperand::MO_CCRegister,
                                     tmpForCC);
             M->SetMachineOperandConst(1, MachineOperand::MO_UnextendedImmed,
                                       valueToMove);
             M->SetMachineOperandVal(2, MachineOperand::MO_VirtualRegister,
-                                    setCCInstr);
+                                    setCCInstr, /*isDef*/ true,
+                                    /*isDefAndUse*/ true);
             mvec.push_back(M);
           }
         break;
       }    
 
-      case 43: // boolreg: VReg
-      case 44: // boolreg: Constant
-        break;
-
       case 51: // reg:   Load(reg)
       case 52: // reg:   Load(ptrreg)
-      case 53: // reg:   LoadIdx(reg,reg)
-      case 54: // reg:   LoadIdx(ptrreg,reg)
         mvec.push_back(new MachineInstr(ChooseLoadInstruction(
                                      subtreeRoot->getValue()->getType())));
-        SetOperandsForMemInstr(mvec, mvec.end()-1, subtreeRoot, target);
+        SetOperandsForMemInstr(mvec, subtreeRoot, target);
         break;
 
       case 55: // reg:   GetElemPtr(reg)
@@ -1963,20 +1913,20 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         // If the GetElemPtr was folded into the user (parent), it will be
         // caught above.  For other cases, we have to compute the address.
         mvec.push_back(new MachineInstr(ADD));
-        SetOperandsForMemInstr(mvec, mvec.end()-1, subtreeRoot, target);
+        SetOperandsForMemInstr(mvec, subtreeRoot, target);
         break;
-        
+
       case 57: // reg:  Alloca: Implement as 1 instruction:
       {         //         add %fp, offsetFromFP -> result
         AllocationInst* instr =
           cast<AllocationInst>(subtreeRoot->getInstruction());
         unsigned int tsize =
-          target.findOptimalStorageSize(instr->getAllocatedType());
+          target.DataLayout.getTypeSize(instr->getAllocatedType());
         assert(tsize != 0);
         CreateCodeForFixedSizeAlloca(target, instr, tsize, 1, mvec);
         break;
       }
-      
+
       case 58: // reg:   Alloca(reg): Implement as 3 instructions:
                 //     mul num, typeSz -> tmp
                 //     sub %sp, tmp    -> %sp
@@ -1986,7 +1936,7 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         const Type* eltType = instr->getAllocatedType();
         
         // If #elements is constant, use simpler code for fixed-size allocas
-        int tsize = (int) target.findOptimalStorageSize(eltType);
+        int tsize = (int) target.DataLayout.getTypeSize(eltType);
         Value* numElementsVal = NULL;
         bool isArray = instr->isArrayAllocation();
         
@@ -2003,13 +1953,13 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
                                           numElementsVal, mvec);
         break;
       }
-      
+
       case 61: // reg:   Call
-      {         // Generate a direct (CALL) or indirect (JMPL). depending
-                // Mark the return-address register and the indirection
-                // register (if any) as hidden virtual registers.
-                // Also, mark the operands of the Call and return value (if
-                // any) as implicit operands of the CALL machine instruction.
+      {         // Generate a direct (CALL) or indirect (JMPL) call.
+                // Mark the return-address register, the indirection
+                // register (for indirect calls), the operands of the Call,
+                // and the return value (if any) as implicit operands
+                // of the machine instruction.
                 // 
                 // If this is a varargs function, floating point arguments
                 // have to passed in integer registers so insert
@@ -2017,79 +1967,86 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
                 // 
         CallInst *callInstr = cast<CallInst>(subtreeRoot->getInstruction());
         Value *callee = callInstr->getCalledValue();
-        
-        // Create hidden virtual register for return address, with type void*. 
-        Instruction* retAddrReg =
+
+        // Create hidden virtual register for return address with type void*
+        TmpInstruction* retAddrReg =
           new TmpInstruction(PointerType::get(Type::VoidTy), callInstr);
         MachineCodeForInstruction::get(callInstr).addTemp(retAddrReg);
-        
+
         // Generate the machine instruction and its operands.
         // Use CALL for direct function calls; this optimistically assumes
         // the PC-relative address fits in the CALL address field (22 bits).
         // Use JMPL for indirect calls.
         // 
-        if (isa<Function>(callee))
-          { // direct function call
-            M = new MachineInstr(CALL);
-            M->SetMachineOperandVal(0, MachineOperand::MO_PCRelativeDisp,
-                                    callee);
-          } 
-        else
-          { // indirect function call
-            M = new MachineInstr(JMPLCALL);
-            M->SetMachineOperandVal(0, MachineOperand::MO_VirtualRegister,
-                                    callee);
-            M->SetMachineOperandConst(1, MachineOperand::MO_SignExtendedImmed,
-                                      (int64_t) 0);
-            M->SetMachineOperandVal(2, MachineOperand::MO_VirtualRegister,
-                                    retAddrReg);
-          }
-        
+        if (isa<Function>(callee))      // direct function call
+          M = Create1OperandInstr_Addr(CALL, callee);
+        else                            // indirect function call
+          M = Create3OperandInstr_SImmed(JMPLCALL, callee,
+                                         (int64_t) 0, retAddrReg);
         mvec.push_back(M);
 
-        // WARNING: Operands 0..N-1 must go in slots 0..N-1 of implicitUses.
-        //          The result value must go in slot N.  This is assumed
-        //          in register allocation.
-        // 
-        // Add the call operands and return value as implicit refs
-        // const Type* funcType = isa<Function>(callee)? callee->getType()
-        //   : cast<PointerType>(callee->getType())->getElementType();
-        const Type* funcType = callee->getType();
-        bool isVarArgs = cast<FunctionType>(cast<PointerType>(funcType)
-                                            ->getElementType())->isVarArg();
+        const FunctionType* funcType =
+          cast<FunctionType>(cast<PointerType>(callee->getType())
+                             ->getElementType());
+        bool isVarArgs = funcType->isVarArg();
+        bool noPrototype = isVarArgs && funcType->getNumParams() == 0;
         
-        for (unsigned i=0, N=callInstr->getNumOperands(); i < N; ++i)
-          if (callInstr->getOperand(i) != callee)
-            {
-              Value* argVal = callInstr->getOperand(i);
-              
-              // Check for FP arguments to varargs functions
-              if (isVarArgs && argVal->getType()->isFloatingPoint())
-                { // Add a copy-float-to-int instruction
-                  MachineCodeForInstruction &destMCFI = 
-                    MachineCodeForInstruction::get(callInstr);   
-                  Instruction* intArgReg =
-                    new TmpInstruction(Type::IntTy, argVal);
-                  destMCFI.addTemp(intArgReg);
-                  
-                  vector<MachineInstr*> minstrVec;
-                  vector<TmpInstruction*> tempVec;
-                  target.getInstrInfo().CreateCodeToCopyFloatToInt(
-                         callInstr->getParent()->getParent(),
-                         argVal, (TmpInstruction*) intArgReg,
-                         minstrVec, tempVec, target);
-                  
-                  mvec.insert(mvec.begin(), minstrVec.begin(),minstrVec.end());
-                  
-                  for (unsigned i=0; i < tempVec.size(); ++i)
-                    destMCFI.addTemp(tempVec[i]);
-                  
-                  argVal = intArgReg;
-                }
-              
-              mvec.back()->addImplicitRef(argVal);
-            }
+        // Use an annotation to pass information about call arguments
+        // to the register allocator.
+        CallArgsDescriptor* argDesc = new CallArgsDescriptor(callInstr,
+                                         retAddrReg, isVarArgs, noPrototype);
+        M->addAnnotation(argDesc);
+        
+        assert(callInstr->getOperand(0) == callee
+               && "This is assumed in the loop below!");
         
+        for (unsigned i=1, N=callInstr->getNumOperands(); i < N; ++i)
+          {
+            Value* argVal = callInstr->getOperand(i);
+            Instruction* intArgReg = NULL;
+            
+            // Check for FP arguments to varargs functions.
+            // Any such argument in the first $K$ args must be passed in an
+            // integer register, where K = #integer argument registers.
+            if (isVarArgs && argVal->getType()->isFloatingPoint())
+              {
+                // If it is a function with no prototype, pass value
+                // as an FP value as well as a varargs value
+                if (noPrototype)
+                  argDesc->getArgInfo(i-1).setUseFPArgReg();
+                
+                // If this arg. is in the first $K$ regs, add a copy
+                // float-to-int instruction to pass the value as an integer.
+                if (i <= target.getRegInfo().GetNumOfIntArgRegs())
+                  {
+                    MachineCodeForInstruction &destMCFI = 
+                      MachineCodeForInstruction::get(callInstr);   
+                    intArgReg = new TmpInstruction(Type::IntTy, argVal);
+                    destMCFI.addTemp(intArgReg);
+                    
+                    vector<MachineInstr*> copyMvec;
+                    target.getInstrInfo().CreateCodeToCopyFloatToInt(target,
+                                           callInstr->getParent()->getParent(),
+                                           argVal, (TmpInstruction*) intArgReg,
+                                           copyMvec, destMCFI);
+                    mvec.insert(mvec.begin(),copyMvec.begin(),copyMvec.end());
+                    
+                    argDesc->getArgInfo(i-1).setUseIntArgReg();
+                    argDesc->getArgInfo(i-1).setArgCopy(intArgReg);
+                  }
+                else
+                  // Cannot fit in first $K$ regs so pass the arg on the stack
+                  argDesc->getArgInfo(i-1).setUseStackSlot();
+              }
+            
+            if (intArgReg)
+              mvec.back()->addImplicitRef(intArgReg);
+            
+            mvec.back()->addImplicitRef(argVal);
+          }
+        
+        // Add the return value as an implicit ref.  The call operands
+        // were added above.
         if (callInstr->getType() != Type::VoidTy)
           mvec.back()->addImplicitRef(callInstr, /*isDef*/ true);
         
@@ -2101,22 +2058,28 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         mvec.push_back(new MachineInstr(NOP));
         break;
       }
-
+      
       case 62: // reg:   Shl(reg, reg)
-      { const Type* opType = subtreeRoot->leftChild()->getValue()->getType();
-        assert(opType->isIntegral()
-               || opType == Type::BoolTy
-               || opType->isPointerType()&& "Shl unsupported for other types");
-        mvec.push_back(new MachineInstr((opType == Type::LongTy)? SLLX : SLL));
-        Set3OperandsFromInstr(mvec.back(), subtreeRoot, target);
+      {
+        Value* argVal1 = subtreeRoot->leftChild()->getValue();
+        Value* argVal2 = subtreeRoot->rightChild()->getValue();
+        Instruction* shlInstr = subtreeRoot->getInstruction();
+        
+        const Type* opType = argVal1->getType();
+        assert((opType->isInteger() || isa<PointerType>(opType)) &&
+               "Shl unsupported for other types");
+        
+        CreateShiftInstructions(target, shlInstr->getParent()->getParent(),
+                                (opType == Type::LongTy)? SLLX : SLL,
+                                argVal1, argVal2, 0, shlInstr, mvec,
+                                MachineCodeForInstruction::get(shlInstr));
         break;
       }
       
       case 63: // reg:   Shr(reg, reg)
       { const Type* opType = subtreeRoot->leftChild()->getValue()->getType();
-        assert(opType->isIntegral()
-               || opType == Type::BoolTy
-               || opType->isPointerType() &&"Shr unsupported for other types");
+        assert((opType->isInteger() || isa<PointerType>(opType)) &&
+               "Shr unsupported for other types");
         mvec.push_back(new MachineInstr((opType->isSigned()
                                    ? ((opType == Type::LongTy)? SRAX : SRA)
                                    : ((opType == Type::LongTy)? SRLX : SRL))));
@@ -2127,22 +2090,6 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
       case 64: // reg:   Phi(reg,reg)
         break;                          // don't forward the value
 
-#undef NEED_PHI_MACHINE_INSTRS
-#ifdef NEED_PHI_MACHINE_INSTRS
-      {                // This instruction has variable #operands, so resultPos is 0.
-        Instruction* phi = subtreeRoot->getInstruction();
-        M = new MachineInstr(PHI, 1 + phi->getNumOperands());
-        M->SetMachineOperandVal(0, MachineOperand::MO_VirtualRegister,
-                                      subtreeRoot->getValue());
-        for (unsigned i=0, N=phi->getNumOperands(); i < N; i++)
-          M->SetMachineOperandVal(i+1, MachineOperand::MO_VirtualRegister,
-                                  phi->getOperand(i));
-        mvec.push_back(M);
-        break;
-      }  
-#endif // NEED_PHI_MACHINE_INSTRS
-      
-      
       case 71: // reg:     VReg
       case 72: // reg:     Constant
         break;                          // don't forward the value
@@ -2152,7 +2099,7 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
         break;
       }
     }
-  
+
   if (forwardOperandNum >= 0)
     { // We did not generate a machine instruction but need to use operand.
       // If user is in the same tree, replace Value in its machine operand.
@@ -2163,14 +2110,43 @@ GetInstructionsByRule(InstructionNode* subtreeRoot,
       else
         {
           vector<MachineInstr*> minstrVec;
-          target.getInstrInfo().CreateCopyInstructionsByType(target, 
-                subtreeRoot->getInstruction()->getParent()->getParent(),
-                subtreeRoot->getInstruction()->getOperand(forwardOperandNum),
-                subtreeRoot->getInstruction(), minstrVec);
+          Instruction* instr = subtreeRoot->getInstruction();
+          target.getInstrInfo().
+            CreateCopyInstructionsByType(target,
+                                         instr->getParent()->getParent(),
+                                         instr->getOperand(forwardOperandNum),
+                                         instr, minstrVec,
+                                        MachineCodeForInstruction::get(instr));
           assert(minstrVec.size() > 0);
           mvec.insert(mvec.end(), minstrVec.begin(), minstrVec.end());
         }
     }
-}
-
 
+  if (maskUnsignedResult)
+    { // If result is unsigned and smaller than int reg size,
+      // we need to clear high bits of result value.
+      assert(forwardOperandNum < 0 && "Need mask but no instruction generated");
+      Instruction* dest = subtreeRoot->getInstruction();
+      if (dest->getType()->isUnsigned())
+        {
+          unsigned destSize = target.DataLayout.getTypeSize(dest->getType());
+          if (destSize <= 4)
+            { // Mask high bits.  Use a TmpInstruction to represent the
+              // intermediate result before masking.  Since those instructions
+              // have already been generated, go back and substitute tmpI
+              // for dest in the result position of each one of them.
+              TmpInstruction *tmpI = new TmpInstruction(dest->getType(), dest,
+                                                        NULL, "maskHi");
+              MachineCodeForInstruction::get(dest).addTemp(tmpI);
+
+              for (unsigned i=0, N=mvec.size(); i < N; ++i)
+                mvec[i]->substituteValue(dest, tmpI);
+
+              M = Create3OperandInstr_UImmed(SRL, tmpI, 8*(4-destSize), dest);
+              mvec.push_back(M);
+            }
+          else if (destSize < target.DataLayout.getIntegerRegize())
+            assert(0 && "Unsupported type size: 32 < size < 64 bits");
+        }
+    }
+}