[AArch64] Fix a bug generating incorrect instruction when building small vector.

author Kevin Qin <Kevin.Qin@arm.com>

Thu, 24 Jul 2014 02:05:42 +0000 (02:05 +0000)

committer Kevin Qin <Kevin.Qin@arm.com>

Thu, 24 Jul 2014 02:05:42 +0000 (02:05 +0000)
author Kevin Qin <Kevin.Qin@arm.com>
Thu, 24 Jul 2014 02:05:42 +0000 (02:05 +0000)
committer Kevin Qin <Kevin.Qin@arm.com>
Thu, 24 Jul 2014 02:05:42 +0000 (02:05 +0000)
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index 02470304372600ca9495302517d8e4d0c36c7273..88ec06f778ef16295bb863561506a2cc3ea15f0a 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4180,9 +4180,22 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
    if (SourceVecs.size() > 2)
      return SDValue();
  
-  SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
+  // Find out the smallest element size among result and two sources, and use
+  // it as element size to build the shuffle_vector.
+  EVT SmallestEltTy = VT.getVectorElementType();
+  for (unsigned i = 0; i < SourceVecs.size(); ++i) {
+    EVT SrcEltTy = SourceVecs[i].getValueType().getVectorElementType();
+    if (SrcEltTy.bitsLT(SmallestEltTy)) {
+      SmallestEltTy = SrcEltTy;
+    }
+  }
+  unsigned ResMultiplier =
+      VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();
    int VEXTOffsets[2] = { 0, 0 };
    int OffsetMultipliers[2] = { 1, 1 };
+  NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
+  EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
+  SDValue ShuffleSrcs[2] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)};
  
    // This loop extracts the usage patterns of the source vectors
    // and prepares appropriate SDValues for a shuffle if possible.
@@ -4190,15 +4203,15 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
      unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements();
      SDValue CurSource = SourceVecs[i];
      if (SourceVecs[i].getValueType().getVectorElementType() !=
-        VT.getVectorElementType()) {
-      // It may hit this case if SourceVecs[i] is AssertSext/AssertZext.
-      // Then bitcast it to the vector which holds asserted element type,
-      // and record the multiplier of element width between SourceVecs and
-      // Build_vector which is needed to extract the correct lanes later.
-      EVT CastVT =
-          EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
-                           SourceVecs[i].getValueSizeInBits() /
-                               VT.getVectorElementType().getSizeInBits());
+        ShuffleVT.getVectorElementType()) {
+      // As ShuffleVT holds smallest element size, it may hit here only if
+      // the element type of SourceVecs is bigger than that of ShuffleVT.
+      // Adjust the element size of SourceVecs to match ShuffleVT, and record
+      // the multipliers.
+      EVT CastVT = EVT::getVectorVT(
+          *DAG.getContext(), ShuffleVT.getVectorElementType(),
+          SourceVecs[i].getValueSizeInBits() /
+              ShuffleVT.getVectorElementType().getSizeInBits());
  
        CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]);
        OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts;
@@ -4207,7 +4220,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
        MinElts[i] *= OffsetMultipliers[i];
      }
  
-    if (CurSource.getValueType() == VT) {
+    if (CurSource.getValueType() == ShuffleVT) {
        // No VEXT necessary
        ShuffleSrcs[i] = CurSource;
        VEXTOffsets[i] = 0;
@@ -4215,8 +4228,9 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
      } else if (NumSrcElts < NumElts) {
        // We can pad out the smaller vector for free, so if it's part of a
        // shuffle...
-      ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, CurSource,
-                                   DAG.getUNDEF(CurSource.getValueType()));
+      ShuffleSrcs[i] =
+          DAG.getNode(ISD::CONCAT_VECTORS, dl, ShuffleVT, CurSource,
+                      DAG.getUNDEF(CurSource.getValueType()));
        continue;
      }
  
@@ -4233,50 +4247,61 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
      if (MinElts[i] >= NumElts) {
        // The extraction can just take the second half
        VEXTOffsets[i] = NumElts;
-      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
-                                   DAG.getIntPtrConstant(NumElts));
+      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
+                                   CurSource, DAG.getIntPtrConstant(NumElts));
      } else if (MaxElts[i] < NumElts) {
        // The extraction can just take the first half
        VEXTOffsets[i] = 0;
-      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
-                                   DAG.getIntPtrConstant(0));
+      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
+                                   CurSource, DAG.getIntPtrConstant(0));
      } else {
        // An actual VEXT is needed
        VEXTOffsets[i] = MinElts[i];
-      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
-                                     DAG.getIntPtrConstant(0));
-      SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
-                                     DAG.getIntPtrConstant(NumElts));
+      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
+                                     CurSource, DAG.getIntPtrConstant(0));
+      SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ShuffleVT,
+                                     CurSource, DAG.getIntPtrConstant(NumElts));
        unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
-      ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2,
-                                   DAG.getConstant(Imm, MVT::i32));
+      ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, ShuffleVT, VEXTSrc1,
+                                   VEXTSrc2, DAG.getConstant(Imm, MVT::i32));
      }
    }
  
    SmallVector<int, 8> Mask;
+  unsigned VTEltSize = VT.getVectorElementType().getSizeInBits();
  
-  for (unsigned i = 0; i < NumElts; ++i) {
+  for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
      SDValue Entry = Op.getOperand(i);
-    if (Entry.getOpcode() == ISD::UNDEF) {
-      Mask.push_back(-1);
-      continue;
+    int SourceNum = 1;
+    unsigned LanePartNum = 0;
+    int ExtractElt;
+    if (Entry.getOpcode() != ISD::UNDEF) {
+      // Check how many parts of source lane should be inserted.
+      SDValue ExtractVec = Entry.getOperand(0);
+      if (ExtractVec == SourceVecs[0])
+        SourceNum = 0;
+      ExtractElt = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
+      unsigned ExtEltSize =
+          ExtractVec.getValueType().getVectorElementType().getSizeInBits();
+      unsigned SmallerSize = ExtEltSize < VTEltSize ? ExtEltSize : VTEltSize;
+      LanePartNum = SmallerSize / SmallestEltTy.getSizeInBits();
      }
  
-    SDValue ExtractVec = Entry.getOperand(0);
-    int ExtractElt =
-        cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue();
-    if (ExtractVec == SourceVecs[0]) {
-      Mask.push_back(ExtractElt * OffsetMultipliers[0] - VEXTOffsets[0]);
-    } else {
-      Mask.push_back(ExtractElt * OffsetMultipliers[1] + NumElts -
-                     VEXTOffsets[1]);
+    for (unsigned j = 0; j != ResMultiplier; ++j) {
+      if (j < LanePartNum)
+        Mask.push_back(ExtractElt * OffsetMultipliers[SourceNum] +
+                       NumElts * SourceNum - VEXTOffsets[SourceNum] + j);
+      else
+        Mask.push_back(-1);
      }
    }
  
    // Final check before we try to produce nonsense...
-  if (isShuffleMaskLegal(Mask, VT))
-    return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
-                                &Mask[0]);
+  if (isShuffleMaskLegal(Mask, ShuffleVT)) {
+    SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleSrcs[0],
+                                           ShuffleSrcs[1], &Mask[0]);
+    return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
+  }
  
    return SDValue();
  }
diff --git a/test/CodeGen/AArch64/neon-perm.ll b/test/CodeGen/AArch64/neon-perm.ll

index 4f8571db7480acb48b37f87d727e1acf5a200328..41e391dcd76c386d20ebd865abca87f7c3ae54e8 100644 (file)
--- a/test/CodeGen/AArch64/neon-perm.ll
+++ b/test/CodeGen/AArch64/neon-perm.ll
@@ -1387,6 +1387,13 @@ entry:
    ret <8 x i16> %shuffle.i
  }
  
+define <4 x i8> @test_vzip1_v4i8(<8 x i8> %p) {
+; CHECK-LABEL: test_vzip1_v4i8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+ %lo = shufflevector <8 x i8> %p, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i8> %lo
+}
+
  define <8 x i8> @test_same_vzip2_s8(<8 x i8> %a) {
  ; CHECK-LABEL: test_same_vzip2_s8:
  ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
author	Kevin Qin <Kevin.Qin@arm.com>
	Thu, 24 Jul 2014 02:05:42 +0000 (02:05 +0000)
committer	Kevin Qin <Kevin.Qin@arm.com>
	Thu, 24 Jul 2014 02:05:42 +0000 (02:05 +0000)
lib/Target/AArch64/AArch64ISelLowering.cpp		patch \| blob \| history
test/CodeGen/AArch64/neon-perm.ll		patch \| blob \| history