From 886f0101a7d176543b831f5efb74c03427244a55 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Fri, 15 Aug 2014 03:54:49 +0000 Subject: [PATCH] [x86] Fix the very broken formation of vpunpck instructions in the target-specific shuffl DAG combines. We were recognizing the paired shuffles backwards. This code needs to be replaced anyways as we have the same functionality elsewhere, but I'll do the refactoring in a follow-up, this is the minimal fix to the behavior. In addition to fixing miscompiles with the new vector shuffle lowering, it also causes the canonicalization to kick in much better, selecting the smaller encoding variants in lots of places in the new AVX path. This still isn't quite ideal as we don't need both the shufpd and the punpck instructions, but that'll get fixed in a follow-up patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215690 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 +- test/CodeGen/X86/avx-sext.ll | 2 +- test/CodeGen/X86/vector-shuffle-128-v4.ll | 9 ++++++++ test/CodeGen/X86/vector-shuffle-256-v4.ll | 26 +++++++++++------------ 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c2f284496d5..5143f19c763 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -19496,7 +19496,7 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root, while (Mask.size() > 1) { SmallVector NewMask; for (int i = 0, e = Mask.size()/2; i < e; ++i) { - if (Mask[2*i] % 2 != 0 || Mask[2*i] != Mask[2*i + 1] + 1) { + if (Mask[2*i] % 2 != 0 || Mask[2*i] + 1 != Mask[2*i + 1]) { NewMask.clear(); break; } diff --git a/test/CodeGen/X86/avx-sext.ll b/test/CodeGen/X86/avx-sext.ll index fb2287f5289..9bcf06f7b32 100644 --- a/test/CodeGen/X86/avx-sext.ll +++ b/test/CodeGen/X86/avx-sext.ll @@ -156,7 +156,7 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) { ; AVX-LABEL: sext_16i8_to_16i16 ; AVX: vpmovsxbw -; AVX: vmovhlps +; AVX: vpunpckhqdq ; AVX: vpmovsxbw ; AVX: ret define <16 x i16> @sext_16i8_to_16i16(<16 x i8> *%ptr) { diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll index 210d672b5c0..3b7c146dd36 100644 --- a/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-AVX1 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown" @@ -60,6 +61,14 @@ define <4 x i32> @shuffle_v4i32_3210(<4 x i32> %a, <4 x i32> %b) { ret <4 x i32> %shuffle } +define <4 x i32> @shuffle_v4i32_2121(<4 x i32> %a, <4 x i32> %b) { +; CHECK-AVX1-LABEL: @shuffle_v4i32_2121 +; CHECK-AVX1: vpshufd {{.*}} # xmm0 = xmm0[2,1,2,1] +; CHECK-AVX1-NEXT: retq + %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle +} + define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) { ; CHECK-SSE2-LABEL: @shuffle_v4f32_0001 ; CHECK-SSE2: shufps {{.*}} # xmm0 = xmm0[0,0,0,1] diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll index d26e1fbb0cb..ac441e9b77f 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -6,7 +6,7 @@ target triple = "x86_64-unknown-unknown" define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: @shuffle_v4i64_0001 ; AVX1: # BB#0: -; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm0[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm0[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> @@ -18,7 +18,7 @@ define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm1[0],xmm0[0] -; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> @@ -41,7 +41,7 @@ define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm0[0],xmm1[1] -; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> @@ -52,7 +52,7 @@ define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: @shuffle_v4i64_1000 ; AVX1: # BB#0: ; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm0[2,3,0,1] -; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> @@ -63,8 +63,8 @@ define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: @shuffle_v4i64_2200 ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1] -; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> @@ -76,7 +76,7 @@ define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm1[1],xmm0[0] -; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[2,3,2,3] +; AVX1-NEXT: vpunpckhqdq {{.*}} # xmm1 = xmm1[1,1] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> @@ -174,7 +174,7 @@ define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: @shuffle_v4i64_0124 ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0] ; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm2[0],xmm1[1] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -185,7 +185,7 @@ define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: @shuffle_v4i64_0142 ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpshufd {{.*}} # xmm2 = xmm2[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm2 = xmm2[0,0] ; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm1[0],xmm2[1] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -197,7 +197,7 @@ define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0] -; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0] ; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm0[0],xmm1[1] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -209,7 +209,7 @@ define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm0[1],xmm2[0] -; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0] ; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm1[0],xmm0[1] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -229,7 +229,7 @@ define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vpshufd {{.*}} # xmm2 = xmm1[2,3,0,1] ; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm2[0],xmm0[1] -; AVX1-NEXT: vpshufd {{.*}} # xmm1 = xmm1[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm1 = xmm1[0,0] ; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm0[0],xmm1[1] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -249,7 +249,7 @@ define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) { ; AVX1: # BB#0: ; AVX1-NEXT: vpshufd {{.*}} # xmm2 = xmm0[2,3,0,1] ; AVX1-NEXT: vshufpd {{.*}} # xmm2 = xmm2[0],xmm1[1] -; AVX1-NEXT: vpshufd {{.*}} # xmm0 = xmm0[0,1,0,1] +; AVX1-NEXT: vpunpcklqdq {{.*}} # xmm0 = xmm0[0,0] ; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm1[0],xmm0[1] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq -- 2.34.1