[AArch64] Don't force MVT::Untyped when selecting LD1LANEpost.

author Ahmed Bougacha <ahmed.bougacha@gmail.com>

Fri, 17 Apr 2015 23:43:33 +0000 (23:43 +0000)

committer Ahmed Bougacha <ahmed.bougacha@gmail.com>

Fri, 17 Apr 2015 23:43:33 +0000 (23:43 +0000)
author Ahmed Bougacha <ahmed.bougacha@gmail.com>
Fri, 17 Apr 2015 23:43:33 +0000 (23:43 +0000)
committer Ahmed Bougacha <ahmed.bougacha@gmail.com>
Fri, 17 Apr 2015 23:43:33 +0000 (23:43 +0000)
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

index f75700d6d3300b3caf31e3f1f090c7991d08a18c..610e7cf63c92f1d3b7343ce1708688a6d9fc6236 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -1234,7 +1234,7 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
    SDValue RegSeq = createQTuple(Regs);
  
    const EVT ResTys[] = {MVT::i64, // Type of the write back register
-                        MVT::Untyped, MVT::Other};
+                        RegSeq->getValueType(0), MVT::Other};
  
    unsigned LaneNo =
        cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
diff --git a/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll

index 3cfbb1433c571b9a0e78dcfd424e023ee3dd23c2..ba31513172d56f0917050cbe3d31154bab6bf0a8 100644 (file)
--- a/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -6193,3 +6193,25 @@ define <4 x float> @test_v4f32_post_reg_ld1lane_dep_vec_on_load(float* %bar, flo
    store float* %tmp3, float** %ptr
    ret <4 x float> %tmp2
  }
+
+; Make sure that we test the narrow V64 code path.
+; The tests above don't, because there, 64-bit insert_vector_elt nodes will be
+; widened to 128-bit before the LD1LANEpost combine has the chance to run,
+; making it avoid narrow vector types.
+; One way to trick that combine into running early is to force the vector ops
+; legalizer to run.  We achieve that using the ctpop.
+; PR23265
+define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(i16* %bar, i16** %ptr, i64 %inc, <4 x i16> %A, <2 x i32>* %d) {
+; CHECK-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow:
+; CHECK: ld1.h  { v0 }[1], [x0], x{{[0-9]+}}
+  %tmp1 = load i16, i16* %bar
+  %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
+  %tmp3 = getelementptr i16, i16* %bar, i64 %inc
+  store i16* %tmp3, i16** %ptr
+  %dl =  load <2 x i32>,  <2 x i32>* %d
+  %dr = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %dl)
+  store <2 x i32> %dr, <2 x i32>* %d
+  ret <4 x i16> %tmp2
+}
+
+declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
author	Ahmed Bougacha <ahmed.bougacha@gmail.com>
	Fri, 17 Apr 2015 23:43:33 +0000 (23:43 +0000)
committer	Ahmed Bougacha <ahmed.bougacha@gmail.com>
	Fri, 17 Apr 2015 23:43:33 +0000 (23:43 +0000)
lib/Target/AArch64/AArch64ISelDAGToDAG.cpp		patch \| blob \| history
test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll		patch \| blob \| history