From cf21d155ed16c6eb9a2c0a5fa2e7f6a19e488476 Mon Sep 17 00:00:00 2001
From: Elena Demikhovsky <elena.demikhovsky@intel.com>
Date: Mon, 5 Aug 2013 12:17:06 +0000
Subject: [PATCH] LLVM Interpreter: This patch implements vector support for
 cast operations (zext, sext, uitofp, sitofp, trunc, fpext, fptosi, fptrunc,
 bitcast) and shift operations (shl, ashr, lshr) for integer and floating
 point data types. Added tests.

Done by Yuri Veselov (mailto:Yuri.Veselov@intel.com).


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187724 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/ExecutionEngine/Interpreter/Execution.cpp | 491 +++++++++++++++---
 test/ExecutionEngine/test-interp-vec-cast.ll  | 146 ++++++
 test/ExecutionEngine/test-interp-vec-shift.ll |  32 ++
 3 files changed, 592 insertions(+), 77 deletions(-)
 create mode 100644 test/ExecutionEngine/test-interp-vec-cast.ll
 create mode 100644 test/ExecutionEngine/test-interp-vec-shift.ll

diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index b95a9e867c1..fc3d579d971 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -1138,16 +1138,42 @@ void Interpreter::visitCallSite(CallSite CS) {
   callFunction((Function*)GVTOP(SRC), ArgVals);
 }
 
+// auxilary function for shift operations
+static unsigned getShiftAmount(uint64_t orgShiftAmount,
+                               llvm::APInt valueToShift) {
+  unsigned valueWidth = valueToShift.getBitWidth();
+  if (orgShiftAmount < (uint64_t)valueWidth)
+    return orgShiftAmount;
+  // according to the llvm documentation, if orgShiftAmount > valueWidth,
+  // the result is undfeined. but we do shift by this rule:
+  return (NextPowerOf2(valueWidth-1) - 1) & orgShiftAmount;
+}
+
+
 void Interpreter::visitShl(BinaryOperator &I) {
   ExecutionContext &SF = ECStack.back();
   GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
   GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
   GenericValue Dest;
-  if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
-    Dest.IntVal = Src1.IntVal.shl(Src2.IntVal.getZExtValue());
-  else
-    Dest.IntVal = Src1.IntVal;
-  
+  const Type *Ty = I.getType();
+
+  if (Ty->isVectorTy()) {
+    uint32_t src1Size = uint32_t(Src1.AggregateVal.size());
+    assert(src1Size == Src2.AggregateVal.size());
+    for (unsigned i = 0; i < src1Size; i++) {
+      GenericValue Result;
+      uint64_t shiftAmount = Src2.AggregateVal[i].IntVal.getZExtValue();
+      llvm::APInt valueToShift = Src1.AggregateVal[i].IntVal;
+      Result.IntVal = valueToShift.shl(getShiftAmount(shiftAmount, valueToShift));
+      Dest.AggregateVal.push_back(Result);
+    }
+  } else {
+    // scalar
+    uint64_t shiftAmount = Src2.IntVal.getZExtValue();
+    llvm::APInt valueToShift = Src1.IntVal;
+    Dest.IntVal = valueToShift.shl(getShiftAmount(shiftAmount, valueToShift));
+  }
+
   SetValue(&I, Dest, SF);
 }
 
@@ -1156,11 +1182,25 @@ void Interpreter::visitLShr(BinaryOperator &I) {
   GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
   GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
   GenericValue Dest;
-  if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
-    Dest.IntVal = Src1.IntVal.lshr(Src2.IntVal.getZExtValue());
-  else
-    Dest.IntVal = Src1.IntVal;
-  
+  const Type *Ty = I.getType();
+
+  if (Ty->isVectorTy()) {
+    uint32_t src1Size = uint32_t(Src1.AggregateVal.size());
+    assert(src1Size == Src2.AggregateVal.size());
+    for (unsigned i = 0; i < src1Size; i++) {
+      GenericValue Result;
+      uint64_t shiftAmount = Src2.AggregateVal[i].IntVal.getZExtValue();
+      llvm::APInt valueToShift = Src1.AggregateVal[i].IntVal;
+      Result.IntVal = valueToShift.lshr(getShiftAmount(shiftAmount, valueToShift));
+      Dest.AggregateVal.push_back(Result);
+    }
+  } else {
+    // scalar
+    uint64_t shiftAmount = Src2.IntVal.getZExtValue();
+    llvm::APInt valueToShift = Src1.IntVal;
+    Dest.IntVal = valueToShift.lshr(getShiftAmount(shiftAmount, valueToShift));
+  }
+
   SetValue(&I, Dest, SF);
 }
 
@@ -1169,110 +1209,273 @@ void Interpreter::visitAShr(BinaryOperator &I) {
   GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
   GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
   GenericValue Dest;
-  if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
-    Dest.IntVal = Src1.IntVal.ashr(Src2.IntVal.getZExtValue());
-  else
-    Dest.IntVal = Src1.IntVal;
-  
+  const Type *Ty = I.getType();
+
+  if (Ty->isVectorTy()) {
+    size_t src1Size = Src1.AggregateVal.size();
+    assert(src1Size == Src2.AggregateVal.size());
+    for (unsigned i = 0; i < src1Size; i++) {
+      GenericValue Result;
+      uint64_t shiftAmount = Src2.AggregateVal[i].IntVal.getZExtValue();
+      llvm::APInt valueToShift = Src1.AggregateVal[i].IntVal;
+      Result.IntVal = valueToShift.ashr(getShiftAmount(shiftAmount, valueToShift));
+      Dest.AggregateVal.push_back(Result);
+    }
+  } else {
+    // scalar
+    uint64_t shiftAmount = Src2.IntVal.getZExtValue();
+    llvm::APInt valueToShift = Src1.IntVal;
+    Dest.IntVal = valueToShift.ashr(getShiftAmount(shiftAmount, valueToShift));
+  }
+
   SetValue(&I, Dest, SF);
 }
 
 GenericValue Interpreter::executeTruncInst(Value *SrcVal, Type *DstTy,
                                            ExecutionContext &SF) {
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  IntegerType *DITy = cast<IntegerType>(DstTy);
-  unsigned DBitWidth = DITy->getBitWidth();
-  Dest.IntVal = Src.IntVal.trunc(DBitWidth);
+  Type *SrcTy = SrcVal->getType();
+  if (SrcTy->isVectorTy()) {
+    Type *DstVecTy = DstTy->getScalarType();
+    unsigned DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
+    unsigned NumElts = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal
+    Dest.AggregateVal.resize(NumElts);
+    for (unsigned i = 0; i < NumElts; i++)
+      Dest.AggregateVal[i].IntVal = Src.AggregateVal[i].IntVal.trunc(DBitWidth);
+  } else {
+    IntegerType *DITy = cast<IntegerType>(DstTy);
+    unsigned DBitWidth = DITy->getBitWidth();
+    Dest.IntVal = Src.IntVal.trunc(DBitWidth);
+  }
   return Dest;
 }
 
 GenericValue Interpreter::executeSExtInst(Value *SrcVal, Type *DstTy,
                                           ExecutionContext &SF) {
+  const Type *SrcTy = SrcVal->getType();
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  IntegerType *DITy = cast<IntegerType>(DstTy);
-  unsigned DBitWidth = DITy->getBitWidth();
-  Dest.IntVal = Src.IntVal.sext(DBitWidth);
+  if (SrcTy->isVectorTy()) {
+    const Type *DstVecTy = DstTy->getScalarType();
+    unsigned DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal.
+    Dest.AggregateVal.resize(size);
+    for (unsigned i = 0; i < size; i++)
+      Dest.AggregateVal[i].IntVal = Src.AggregateVal[i].IntVal.sext(DBitWidth);
+  } else {
+    const IntegerType *DITy = cast<IntegerType>(DstTy);
+    unsigned DBitWidth = DITy->getBitWidth();
+    Dest.IntVal = Src.IntVal.sext(DBitWidth);
+  }
   return Dest;
 }
 
 GenericValue Interpreter::executeZExtInst(Value *SrcVal, Type *DstTy,
                                           ExecutionContext &SF) {
+  const Type *SrcTy = SrcVal->getType();
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  IntegerType *DITy = cast<IntegerType>(DstTy);
-  unsigned DBitWidth = DITy->getBitWidth();
-  Dest.IntVal = Src.IntVal.zext(DBitWidth);
+  if (SrcTy->isVectorTy()) {
+    const Type *DstVecTy = DstTy->getScalarType();
+    unsigned DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
+
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal.
+    Dest.AggregateVal.resize(size);
+    for (unsigned i = 0; i < size; i++)
+      Dest.AggregateVal[i].IntVal = Src.AggregateVal[i].IntVal.zext(DBitWidth);
+  } else {
+    const IntegerType *DITy = cast<IntegerType>(DstTy);
+    unsigned DBitWidth = DITy->getBitWidth();
+    Dest.IntVal = Src.IntVal.zext(DBitWidth);
+  }
   return Dest;
 }
 
 GenericValue Interpreter::executeFPTruncInst(Value *SrcVal, Type *DstTy,
                                              ExecutionContext &SF) {
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(SrcVal->getType()->isDoubleTy() && DstTy->isFloatTy() &&
-         "Invalid FPTrunc instruction");
-  Dest.FloatVal = (float) Src.DoubleVal;
+
+  if (SrcVal->getType()->getTypeID() == Type::VectorTyID) {
+    assert(SrcVal->getType()->getScalarType()->isDoubleTy() &&
+           DstTy->getScalarType()->isFloatTy() &&
+           "Invalid FPTrunc instruction");
+
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal.
+    Dest.AggregateVal.resize(size);
+    for (unsigned i = 0; i < size; i++)
+      Dest.AggregateVal[i].FloatVal = (float)Src.AggregateVal[i].DoubleVal;
+  } else {
+    assert(SrcVal->getType()->isDoubleTy() && DstTy->isFloatTy() &&
+           "Invalid FPTrunc instruction");
+    Dest.FloatVal = (float)Src.DoubleVal;
+  }
+
   return Dest;
 }
 
 GenericValue Interpreter::executeFPExtInst(Value *SrcVal, Type *DstTy,
                                            ExecutionContext &SF) {
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(SrcVal->getType()->isFloatTy() && DstTy->isDoubleTy() &&
-         "Invalid FPTrunc instruction");
-  Dest.DoubleVal = (double) Src.FloatVal;
+
+  if (SrcVal->getType()->getTypeID() == Type::VectorTyID) {
+    assert(SrcVal->getType()->getScalarType()->isFloatTy() &&
+           DstTy->getScalarType()->isDoubleTy() && "Invalid FPExt instruction");
+
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal.
+    Dest.AggregateVal.resize(size);
+    for (unsigned i = 0; i < size; i++)
+      Dest.AggregateVal[i].DoubleVal = (double)Src.AggregateVal[i].FloatVal;
+  } else {
+    assert(SrcVal->getType()->isFloatTy() && DstTy->isDoubleTy() &&
+           "Invalid FPExt instruction");
+    Dest.DoubleVal = (double)Src.FloatVal;
+  }
+
   return Dest;
 }
 
 GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, Type *DstTy,
                                             ExecutionContext &SF) {
   Type *SrcTy = SrcVal->getType();
-  uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(SrcTy->isFloatingPointTy() && "Invalid FPToUI instruction");
 
-  if (SrcTy->getTypeID() == Type::FloatTyID)
-    Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
-  else
-    Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth);
+  if (SrcTy->getTypeID() == Type::VectorTyID) {
+    const Type *DstVecTy = DstTy->getScalarType();
+    const Type *SrcVecTy = SrcTy->getScalarType();
+    uint32_t DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal.
+    Dest.AggregateVal.resize(size);
+
+    if (SrcVecTy->getTypeID() == Type::FloatTyID) {
+      assert(SrcVecTy->isFloatingPointTy() && "Invalid FPToUI instruction");
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].IntVal = APIntOps::RoundFloatToAPInt(
+            Src.AggregateVal[i].FloatVal, DBitWidth);
+    } else {
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].IntVal = APIntOps::RoundDoubleToAPInt(
+            Src.AggregateVal[i].DoubleVal, DBitWidth);
+    }
+  } else {
+    // scalar
+    uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
+    assert(SrcTy->isFloatingPointTy() && "Invalid FPToUI instruction");
+
+    if (SrcTy->getTypeID() == Type::FloatTyID)
+      Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
+    else {
+      Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth);
+    }
+  }
+
   return Dest;
 }
 
 GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, Type *DstTy,
                                             ExecutionContext &SF) {
   Type *SrcTy = SrcVal->getType();
-  uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(SrcTy->isFloatingPointTy() && "Invalid FPToSI instruction");
 
-  if (SrcTy->getTypeID() == Type::FloatTyID)
-    Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
-  else
-    Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth);
+  if (SrcTy->getTypeID() == Type::VectorTyID) {
+    const Type *DstVecTy = DstTy->getScalarType();
+    const Type *SrcVecTy = SrcTy->getScalarType();
+    uint32_t DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal
+    Dest.AggregateVal.resize(size);
+
+    if (SrcVecTy->getTypeID() == Type::FloatTyID) {
+      assert(SrcVecTy->isFloatingPointTy() && "Invalid FPToSI instruction");
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].IntVal = APIntOps::RoundFloatToAPInt(
+            Src.AggregateVal[i].FloatVal, DBitWidth);
+    } else {
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].IntVal = APIntOps::RoundDoubleToAPInt(
+            Src.AggregateVal[i].DoubleVal, DBitWidth);
+    }
+  } else {
+    // scalar
+    unsigned DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
+    assert(SrcTy->isFloatingPointTy() && "Invalid FPToSI instruction");
+
+    if (SrcTy->getTypeID() == Type::FloatTyID)
+      Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
+    else {
+      Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth);
+    }
+  }
   return Dest;
 }
 
 GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, Type *DstTy,
                                             ExecutionContext &SF) {
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(DstTy->isFloatingPointTy() && "Invalid UIToFP instruction");
 
-  if (DstTy->getTypeID() == Type::FloatTyID)
-    Dest.FloatVal = APIntOps::RoundAPIntToFloat(Src.IntVal);
-  else
-    Dest.DoubleVal = APIntOps::RoundAPIntToDouble(Src.IntVal);
+  if (SrcVal->getType()->getTypeID() == Type::VectorTyID) {
+    const Type *DstVecTy = DstTy->getScalarType();
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal
+    Dest.AggregateVal.resize(size);
+
+    if (DstVecTy->getTypeID() == Type::FloatTyID) {
+      assert(DstVecTy->isFloatingPointTy() && "Invalid UIToFP instruction");
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].FloatVal =
+            APIntOps::RoundAPIntToFloat(Src.AggregateVal[i].IntVal);
+    } else {
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].DoubleVal =
+            APIntOps::RoundAPIntToDouble(Src.AggregateVal[i].IntVal);
+    }
+  } else {
+    // scalar
+    assert(DstTy->isFloatingPointTy() && "Invalid UIToFP instruction");
+    if (DstTy->getTypeID() == Type::FloatTyID)
+      Dest.FloatVal = APIntOps::RoundAPIntToFloat(Src.IntVal);
+    else {
+      Dest.DoubleVal = APIntOps::RoundAPIntToDouble(Src.IntVal);
+    }
+  }
   return Dest;
 }
 
 GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, Type *DstTy,
                                             ExecutionContext &SF) {
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(DstTy->isFloatingPointTy() && "Invalid SIToFP instruction");
 
-  if (DstTy->getTypeID() == Type::FloatTyID)
-    Dest.FloatVal = APIntOps::RoundSignedAPIntToFloat(Src.IntVal);
-  else
-    Dest.DoubleVal = APIntOps::RoundSignedAPIntToDouble(Src.IntVal);
-  return Dest;
+  if (SrcVal->getType()->getTypeID() == Type::VectorTyID) {
+    const Type *DstVecTy = DstTy->getScalarType();
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal
+    Dest.AggregateVal.resize(size);
+
+    if (DstVecTy->getTypeID() == Type::FloatTyID) {
+      assert(DstVecTy->isFloatingPointTy() && "Invalid SIToFP instruction");
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].FloatVal =
+            APIntOps::RoundSignedAPIntToFloat(Src.AggregateVal[i].IntVal);
+    } else {
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].DoubleVal =
+            APIntOps::RoundSignedAPIntToDouble(Src.AggregateVal[i].IntVal);
+    }
+  } else {
+    // scalar
+    assert(DstTy->isFloatingPointTy() && "Invalid SIToFP instruction");
+
+    if (DstTy->getTypeID() == Type::FloatTyID)
+      Dest.FloatVal = APIntOps::RoundSignedAPIntToFloat(Src.IntVal);
+    else {
+      Dest.DoubleVal = APIntOps::RoundSignedAPIntToDouble(Src.IntVal);
+    }
+  }
 
+  return Dest;
 }
 
 GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, Type *DstTy,
@@ -1300,33 +1503,167 @@ GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, Type *DstTy,
 
 GenericValue Interpreter::executeBitCastInst(Value *SrcVal, Type *DstTy,
                                              ExecutionContext &SF) {
-  
+
+  // This instruction supports bitwise conversion of vectors to integers and
+  // to vectors of other types (as long as they have the same size)
   Type *SrcTy = SrcVal->getType();
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  if (DstTy->isPointerTy()) {
-    assert(SrcTy->isPointerTy() && "Invalid BitCast");
-    Dest.PointerVal = Src.PointerVal;
-  } else if (DstTy->isIntegerTy()) {
-    if (SrcTy->isFloatTy()) {
-      Dest.IntVal = APInt::floatToBits(Src.FloatVal);
-    } else if (SrcTy->isDoubleTy()) {
-      Dest.IntVal = APInt::doubleToBits(Src.DoubleVal);
-    } else if (SrcTy->isIntegerTy()) {
-      Dest.IntVal = Src.IntVal;
-    } else 
+
+  if ((SrcTy->getTypeID() == Type::VectorTyID) ||
+      (DstTy->getTypeID() == Type::VectorTyID)) {
+    // vector src bitcast to vector dst or vector src bitcast to scalar dst or
+    // scalar src bitcast to vector dst
+    bool isLittleEndian = TD.isLittleEndian();
+    GenericValue TempDst, TempSrc, SrcVec;
+    const Type *SrcElemTy;
+    const Type *DstElemTy;
+    unsigned SrcBitSize;
+    unsigned DstBitSize;
+    unsigned SrcNum;
+    unsigned DstNum;
+
+    if (SrcTy->getTypeID() == Type::VectorTyID) {
+      SrcElemTy = SrcTy->getScalarType();
+      SrcBitSize = SrcTy->getScalarSizeInBits();
+      SrcNum = Src.AggregateVal.size();
+      SrcVec = Src;
+    } else {
+      // if src is scalar value, make it vector <1 x type>
+      SrcElemTy = SrcTy;
+      SrcBitSize = SrcTy->getPrimitiveSizeInBits();
+      SrcNum = 1;
+      SrcVec.AggregateVal.push_back(Src);
+    }
+
+    if (DstTy->getTypeID() == Type::VectorTyID) {
+      DstElemTy = DstTy->getScalarType();
+      DstBitSize = DstTy->getScalarSizeInBits();
+      DstNum = (SrcNum * SrcBitSize) / DstBitSize;
+    } else {
+      DstElemTy = DstTy;
+      DstBitSize = DstTy->getPrimitiveSizeInBits();
+      DstNum = 1;
+    }
+
+    if (SrcNum * SrcBitSize != DstNum * DstBitSize)
       llvm_unreachable("Invalid BitCast");
-  } else if (DstTy->isFloatTy()) {
-    if (SrcTy->isIntegerTy())
-      Dest.FloatVal = Src.IntVal.bitsToFloat();
-    else
-      Dest.FloatVal = Src.FloatVal;
-  } else if (DstTy->isDoubleTy()) {
-    if (SrcTy->isIntegerTy())
-      Dest.DoubleVal = Src.IntVal.bitsToDouble();
-    else
-      Dest.DoubleVal = Src.DoubleVal;
-  } else
-    llvm_unreachable("Invalid Bitcast");
+
+    // If src is floating point, cast to integer first.
+    TempSrc.AggregateVal.resize(SrcNum);
+    if (SrcElemTy->isFloatTy()) {
+      for (unsigned i = 0; i < SrcNum; i++)
+        TempSrc.AggregateVal[i].IntVal =
+            APInt::floatToBits(SrcVec.AggregateVal[i].FloatVal);
+
+    } else if (SrcElemTy->isDoubleTy()) {
+      for (unsigned i = 0; i < SrcNum; i++)
+        TempSrc.AggregateVal[i].IntVal =
+            APInt::doubleToBits(SrcVec.AggregateVal[i].DoubleVal);
+    } else if (SrcElemTy->isIntegerTy()) {
+      for (unsigned i = 0; i < SrcNum; i++)
+        TempSrc.AggregateVal[i].IntVal = SrcVec.AggregateVal[i].IntVal;
+    } else {
+      // Pointers are not allowed as the element type of vector.
+      llvm_unreachable("Invalid Bitcast");
+    }
+
+    // now TempSrc is integer type vector
+    if (DstNum < SrcNum) {
+      // Example: bitcast <4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>
+      unsigned Ratio = SrcNum / DstNum;
+      unsigned SrcElt = 0;
+      for (unsigned i = 0; i < DstNum; i++) {
+        GenericValue Elt;
+        Elt.IntVal = 0;
+        Elt.IntVal = Elt.IntVal.zext(DstBitSize);
+        unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize * (Ratio - 1);
+        for (unsigned j = 0; j < Ratio; j++) {
+          APInt Tmp;
+          Tmp = Tmp.zext(SrcBitSize);
+          Tmp = TempSrc.AggregateVal[SrcElt++].IntVal;
+          Tmp = Tmp.zext(DstBitSize);
+          Tmp = Tmp.shl(ShiftAmt);
+          ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
+          Elt.IntVal |= Tmp;
+        }
+        TempDst.AggregateVal.push_back(Elt);
+      }
+    } else {
+      // Example: bitcast <2 x i64> <i64 0, i64 1> to <4 x i32>
+      unsigned Ratio = DstNum / SrcNum;
+      for (unsigned i = 0; i < SrcNum; i++) {
+        unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize * (Ratio - 1);
+        for (unsigned j = 0; j < Ratio; j++) {
+          GenericValue Elt;
+          Elt.IntVal = Elt.IntVal.zext(SrcBitSize);
+          Elt.IntVal = TempSrc.AggregateVal[i].IntVal;
+          Elt.IntVal = Elt.IntVal.lshr(ShiftAmt);
+          // it could be DstBitSize == SrcBitSize, so check it
+          if (DstBitSize < SrcBitSize)
+            Elt.IntVal = Elt.IntVal.trunc(DstBitSize);
+          ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
+          TempDst.AggregateVal.push_back(Elt);
+        }
+      }
+    }
+
+    // convert result from integer to specified type
+    if (DstTy->getTypeID() == Type::VectorTyID) {
+      if (DstElemTy->isDoubleTy()) {
+        Dest.AggregateVal.resize(DstNum);
+        for (unsigned i = 0; i < DstNum; i++)
+          Dest.AggregateVal[i].DoubleVal =
+              TempDst.AggregateVal[i].IntVal.bitsToDouble();
+      } else if (DstElemTy->isFloatTy()) {
+        Dest.AggregateVal.resize(DstNum);
+        for (unsigned i = 0; i < DstNum; i++)
+          Dest.AggregateVal[i].FloatVal =
+              TempDst.AggregateVal[i].IntVal.bitsToFloat();
+      } else {
+        Dest = TempDst;
+      }
+    } else {
+      if (DstElemTy->isDoubleTy())
+        Dest.DoubleVal = TempDst.AggregateVal[0].IntVal.bitsToDouble();
+      else if (DstElemTy->isFloatTy()) {
+        Dest.FloatVal = TempDst.AggregateVal[0].IntVal.bitsToFloat();
+      } else {
+        Dest.IntVal = TempDst.AggregateVal[0].IntVal;
+      }
+    }
+  } else { //  if ((SrcTy->getTypeID() == Type::VectorTyID) ||
+           //     (DstTy->getTypeID() == Type::VectorTyID))
+
+    // scalar src bitcast to scalar dst
+    if (DstTy->isPointerTy()) {
+      assert(SrcTy->isPointerTy() && "Invalid BitCast");
+      Dest.PointerVal = Src.PointerVal;
+    } else if (DstTy->isIntegerTy()) {
+      if (SrcTy->isFloatTy())
+        Dest.IntVal = APInt::floatToBits(Src.FloatVal);
+      else if (SrcTy->isDoubleTy()) {
+        Dest.IntVal = APInt::doubleToBits(Src.DoubleVal);
+      } else if (SrcTy->isIntegerTy()) {
+        Dest.IntVal = Src.IntVal;
+      } else {
+        llvm_unreachable("Invalid BitCast");
+      }
+    } else if (DstTy->isFloatTy()) {
+      if (SrcTy->isIntegerTy())
+        Dest.FloatVal = Src.IntVal.bitsToFloat();
+      else {
+        Dest.FloatVal = Src.FloatVal;
+      }
+    } else if (DstTy->isDoubleTy()) {
+      if (SrcTy->isIntegerTy())
+        Dest.DoubleVal = Src.IntVal.bitsToDouble();
+      else {
+        Dest.DoubleVal = Src.DoubleVal;
+      }
+    } else {
+      llvm_unreachable("Invalid Bitcast");
+    }
+  }
 
   return Dest;
 }
diff --git a/test/ExecutionEngine/test-interp-vec-cast.ll b/test/ExecutionEngine/test-interp-vec-cast.ll
new file mode 100644
index 00000000000..3f9f66640fa
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-cast.ll
@@ -0,0 +1,146 @@
+; RUN: %lli -force-interpreter=true %s > /dev/null
+
+define i32 @main() {
+    zext <2 x i1> <i1 true,i1 true> to <2 x i8>
+    zext <3 x i1> <i1 true,i1 true,i1 true> to <3 x i8>
+    zext <2 x i1> <i1 true,i1 true> to <2 x i16>
+    zext <3 x i1> <i1 true,i1 true,i1 true> to <3 x i16>
+    zext <2 x i1> <i1 true,i1 true> to <2 x i32>
+    zext <3 x i1> <i1 true,i1 true,i1 true> to <3 x i32>
+    zext <2 x i1> <i1 true,i1 true> to <2 x i64>
+    zext <3 x i1> <i1 true,i1 true,i1 true> to <3 x i64>
+    zext <3 x i8> <i8 4, i8 4, i8 4> to <3 x i16>
+    zext <2 x i8> <i8 -4, i8 -4> to <2 x i16>
+    zext <3 x i8> <i8 4, i8 4, i8 4> to <3 x i32>
+    zext <2 x i8> <i8 -4, i8 -4> to <2 x i32>
+    zext <3 x i8> <i8 4, i8 4, i8 4> to <3 x i64>
+    zext <2 x i8> <i8 -4, i8 -4> to <2 x i64>
+    zext <3 x i16> <i16 4, i16 4, i16 4> to <3 x i32>
+    zext <2 x i16> <i16 -4, i16 -4> to <2 x i32>
+    zext <3 x i16> <i16 4, i16 4, i16 4> to <3 x i64>
+    zext <2 x i16> <i16 -4, i16 -4> to <2 x i64>
+    zext <3 x i32> <i32 4, i32 4, i32 4> to <3 x i64>
+    zext <2 x i32> <i32 -4, i32 -4> to <2 x i64>
+
+
+    sext <2 x i1> <i1 true,i1 true> to <2 x i8>
+    sext <3 x i1> <i1 true,i1 false,i1 true> to <3 x i8>
+    sext <2 x i1> <i1 true,i1 true> to <2 x i16>
+    sext <3 x i1> <i1 true,i1 false,i1 true> to <3 x i16>
+    sext <2 x i1> <i1 true,i1 true> to <2 x i32>
+    sext <3 x i1> <i1 true,i1 false,i1 true> to <3 x i32>
+    sext <2 x i1> <i1 true,i1 true> to <2 x i64>
+    sext <3 x i1> <i1 true,i1 false,i1 true> to <3 x i64>
+    sext <3 x i8> <i8 -4, i8 0, i8 4> to <3 x i16>
+    sext <2 x i8> <i8 -4, i8 4> to <2 x i16>
+    sext <3 x i8> <i8 -4, i8 0, i8 4> to <3 x i32>
+    sext <2 x i8> <i8 -4, i8 4> to <2 x i32>
+    sext <3 x i8> <i8 -4, i8 0, i8 4> to <3 x i64>
+    sext <2 x i8> <i8 -4, i8 4> to <2 x i64>
+    sext <3 x i16> <i16 -4, i16 0, i16 4> to <3 x i32>
+    sext <2 x i16> <i16 -4, i16 4> to <2 x i32>
+    sext <3 x i16> <i16 -4, i16 0, i16 4> to <3 x i64>
+    sext <2 x i16> <i16 -4, i16 4> to <2 x i64>
+    sext <3 x i32> <i32 -4, i32 0, i32 4> to <3 x i64>
+    sext <2 x i32> <i32 -4, i32 4> to <2 x i64>
+
+
+    uitofp <3 x i1> <i1 true,i1 false,i1 true> to <3 x float>
+    uitofp <2 x i1> <i1 true,i1 true> to <2 x double>
+    uitofp <3 x i8> <i8 -4,i8 0,i8 4> to <3 x float>
+    uitofp <2 x i8> <i8 -4,i8 4> to <2 x double>
+    uitofp <3 x i16> <i16 -4,i16 0,i16 4> to <3 x float>
+    uitofp <2 x i16> <i16 -4,i16 4> to <2 x double>
+    uitofp <3 x i32> <i32 -4,i32 0,i32 4> to <3 x float>
+    uitofp <2 x i32> <i32 -4,i32 4> to <2 x double>
+    uitofp <3 x i64> <i64 -4,i64 0,i64 4> to <3 x float>
+    uitofp <2 x i64> <i64 -4,i64 4> to <2 x double>
+
+
+    sitofp <3 x i1> <i1 true,i1 false,i1 true> to <3 x float>
+    sitofp <2 x i1> <i1 true,i1 true> to <2 x double>
+    sitofp <3 x i8> <i8 -4,i8 0,i8 4> to <3 x float>
+    sitofp <2 x i8> <i8 -4,i8 4> to <2 x double>
+    sitofp <3 x i16> <i16 -4,i16 0,i16 4> to <3 x float>
+    sitofp <2 x i16> <i16 -4,i16 4> to <2 x double>
+    sitofp <3 x i32> <i32 -4,i32 0,i32 4> to <3 x float>
+    sitofp <2 x i32> <i32 -4,i32 4> to <2 x double>
+    sitofp <3 x i64> <i64 -4,i64 0,i64 4> to <3 x float>
+    sitofp <2 x i64> <i64 -4,i64 4> to <2 x double>
+
+    trunc <2 x i16> <i16 -6, i16 6> to <2 x i8>
+    trunc <3 x i16> <i16 -6, i16 6, i16 0> to <3 x i8>
+    trunc <2 x i32> <i32 -6, i32 6> to <2 x i8>
+    trunc <3 x i32> <i32 -6, i32 6, i32 0> to <3 x i8>
+    trunc <2 x i32> <i32 -6, i32 6> to <2 x i16>
+    trunc <3 x i32> <i32 -6, i32 6, i32 0> to <3 x i16>
+    trunc <2 x i64> <i64 -6, i64 6> to <2 x i8>
+    trunc <3 x i64> <i64 -6, i64 6, i64 0> to <3 x i8>
+    trunc <2 x i64> <i64 -6, i64 6> to <2 x i16>
+    trunc <3 x i64> <i64 -6, i64 6, i64 0> to <3 x i16>
+    trunc <2 x i64> <i64 -6, i64 6> to <2 x i32>
+    trunc <3 x i64> <i64 -6, i64 6, i64 0> to <3 x i32>
+
+
+    fpext <2 x float>  < float 0.000000e+00, float 1.0> to <2 x double>
+    fpext <3 x float>  < float 0.000000e+00, float -1.0, float 1.0> to <3 x double>
+
+    fptosi <2 x double> < double 0.000000e+00, double 1.0> to <2 x i8>
+    fptosi <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i8>
+    fptosi <2 x double> < double 0.000000e+00, double 1.0> to <2 x i16>
+    fptosi <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i16>
+    fptosi <2 x double> < double 0.000000e+00, double 1.0> to <2 x i32>
+    fptosi <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i32>
+    fptosi <2 x double> < double 0.000000e+00, double 1.0> to <2 x i64>
+    fptosi <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i64>
+
+    fptoui <2 x double> < double 0.000000e+00, double 1.0> to <2 x i8>
+    fptoui <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i8>
+    fptoui <2 x double> < double 0.000000e+00, double 1.0> to <2 x i16>
+    fptoui <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i16>
+    fptoui <2 x double> < double 0.000000e+00, double 1.0> to <2 x i32>
+    fptoui <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i32>
+    fptoui <2 x double> < double 0.000000e+00, double 1.0> to <2 x i64>
+    fptoui <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i64>
+
+    fptrunc <2 x double> < double 0.000000e+00, double 1.0> to <2 x float>
+    fptrunc <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x float>
+
+    bitcast <8 x i8> <i8 0, i8 -1, i8 2, i8 -3, i8 4, i8 -5, i8 6, i8 -7> to <4 x i16>
+    bitcast <8 x i8> <i8 0, i8 -1, i8 2, i8 -3, i8 4, i8 -5, i8 6, i8 -7> to <2 x i32>
+    bitcast <8 x i8> <i8 0, i8 -1, i8 2, i8 -3, i8 4, i8 -5, i8 6, i8 -7> to i64
+    bitcast <8 x i8> <i8 0, i8 -1, i8 2, i8 -3, i8 4, i8 -5, i8 6, i8 -7> to <2 x float>
+    bitcast <8 x i8> <i8 0, i8 -1, i8 2, i8 -3, i8 4, i8 -5, i8 6, i8 -7> to double
+
+    bitcast <4 x i16> <i16 0, i16 -1, i16 2, i16 -3> to <8 x i8>
+    bitcast <4 x i16> <i16 0, i16 -1, i16 2, i16 -3> to <2 x i32>
+    bitcast <4 x i16> <i16 0, i16 -1, i16 2, i16 -3> to i64
+    bitcast <4 x i16> <i16 0, i16 -1, i16 2, i16 -3> to <2 x float>
+    bitcast <4 x i16> <i16 0, i16 -1, i16 2, i16 -3> to double
+
+    bitcast <2 x i32> <i32 1, i32 -1> to <8 x i8>
+    bitcast <2 x i32> <i32 1, i32 -1> to <4 x i16>
+    bitcast <2 x i32> <i32 1, i32 -1> to i64
+    bitcast <2 x i32> <i32 1, i32 -1> to <2 x float>
+    bitcast <2 x i32> <i32 1, i32 -1> to double
+
+    bitcast i64 1 to <8 x i8>
+    bitcast i64 1 to <4 x i16>
+    bitcast i64 1 to <2 x i32>
+    bitcast i64 1 to <2 x float>
+    bitcast i64 1 to double
+
+    bitcast <2 x float> <float 1.0, float -1.0> to <8 x i8>
+    bitcast <2 x float> <float 1.0, float -1.0> to <4 x i16>
+    bitcast <2 x float> <float 1.0, float -1.0> to i64
+    bitcast <2 x float> <float 1.0, float -1.0> to <2 x i32>
+    bitcast <2 x float> <float 1.0, float -1.0> to double
+
+    bitcast double 1.0 to <8 x i8>
+    bitcast double 1.0 to <4 x i16>
+    bitcast double 1.0 to <2 x i32>
+    bitcast double 1.0 to <2 x float>
+    bitcast double 1.0 to i64
+
+    ret i32 0
+}
diff --git a/test/ExecutionEngine/test-interp-vec-shift.ll b/test/ExecutionEngine/test-interp-vec-shift.ll
new file mode 100644
index 00000000000..3aa4f4e54f3
--- /dev/null
+++ b/test/ExecutionEngine/test-interp-vec-shift.ll
@@ -0,0 +1,32 @@
+; RUN: %lli -force-interpreter=true %s > /dev/null
+
+define i32 @main() {
+    %shamt = add <2 x i8> <i8 0, i8 0>, <i8 1, i8 2>
+    %shift.upgrd.1 = zext <2 x i8> %shamt to <2 x i32>
+    %t1.s = shl <2 x i32> <i32 1, i32 2>, %shift.upgrd.1
+    %t2.s = shl <2 x i32> <i32 1, i32 2>, <i32 3, i32 4>
+    %shift.upgrd.2 = zext <2 x i8> %shamt to <2 x i32>
+    %t1 = shl <2 x i32> <i32 1, i32 2>, %shift.upgrd.2
+    %t2 = shl <2 x i32> <i32 1, i32 0>, <i32 5, i32 6>
+    %t2.s.upgrd.3 = shl <2 x i64> <i64 1, i64 2>, <i64 3, i64 4>
+    %t2.upgrd.4 = shl <2 x i64> <i64 1, i64 2>, <i64 6, i64 7>
+    %shift.upgrd.5 = zext <2 x i8> %shamt to <2 x i32>
+    %tr1.s = ashr <2 x i32> <i32 1, i32 2>, %shift.upgrd.5
+    %tr2.s = ashr <2 x i32> <i32 1, i32 2>, <i32 4, i32 5>
+    %shift.upgrd.6 = zext <2 x i8> %shamt to <2 x i32>
+    %tr1 = lshr <2 x i32> <i32 1, i32 2>, %shift.upgrd.6
+    %tr2 = lshr <2 x i32> <i32 1, i32 2>, <i32 5, i32 6>
+    %tr1.l = ashr <2 x i64> <i64 1, i64 2>, <i64 4, i64 5>
+    %shift.upgrd.7 = zext <2 x i8> %shamt to <2 x i64>
+    %tr2.l = ashr <2 x i64> <i64 1, i64 2>, %shift.upgrd.7
+    %tr3.l = shl <2 x i64> <i64 1, i64 2>, <i64 4, i64 5>
+    %shift.upgrd.8 = zext <2 x i8> %shamt to <2 x i64>
+    %tr4.l = shl <2 x i64> <i64 1, i64 2>, %shift.upgrd.8
+    %tr1.u = lshr <2 x i64> <i64 1, i64 2>, <i64 5, i64 6>
+    %shift.upgrd.9 = zext <2 x i8> %shamt to <2 x i64>
+    %tr2.u = lshr <2 x i64> <i64 1, i64 2>, %shift.upgrd.9
+    %tr3.u = shl <2 x i64> <i64 1, i64 2>, <i64 5, i64 6>
+    %shift.upgrd.10 = zext <2 x i8> %shamt to <2 x i64>
+    %tr4.u = shl <2 x i64> <i64 1, i64 2>, %shift.upgrd.10
+    ret i32 0
+}
-- 
2.34.1