From: Simon Pilgrim Date: Sat, 18 Jul 2015 20:06:30 +0000 (+0000) Subject: [X86][SSE] Updated SHL/LSHR i64 vectorization costs. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=9549a0c0bcaf851f0cb1deffb215a5b5c55e6faf;p=oota-llvm.git [X86][SSE] Updated SHL/LSHR i64 vectorization costs. This was missed in D8416. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@242621 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 7df72609184..7cda54d6b00 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -259,13 +259,13 @@ unsigned X86TTIImpl::getArithmeticInstrCost( { ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence. { ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence. { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul. - { ISD::SHL, MVT::v2i64, 2*10 }, // Scalarized. - { ISD::SHL, MVT::v4i64, 4*10 }, // Scalarized. + { ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence. + { ISD::SHL, MVT::v4i64, 8 }, // splat+shuffle sequence. { ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence. { ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence. { ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend. - { ISD::SRL, MVT::v2i64, 2*10 }, // Scalarized. + { ISD::SRL, MVT::v2i64, 4 }, // splat+shuffle sequence. { ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence. { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence. diff --git a/test/Analysis/CostModel/X86/testshiftlshr.ll b/test/Analysis/CostModel/X86/testshiftlshr.ll index 5775a42d08a..52f176fe4d6 100644 --- a/test/Analysis/CostModel/X86/testshiftlshr.ll +++ b/test/Analysis/CostModel/X86/testshiftlshr.ll @@ -5,7 +5,7 @@ define %shifttype @shift2i16(%shifttype %a, %shifttype %b) { entry: ; SSE2: shift2i16 - ; SSE2: cost of 20 {{.*}} lshr + ; SSE2: cost of 4 {{.*}} lshr ; SSE2-CODEGEN: shift2i16 ; SSE2-CODEGEN: psrlq @@ -65,7 +65,7 @@ entry: define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) { entry: ; SSE2: shift2i32 - ; SSE2: cost of 20 {{.*}} lshr + ; SSE2: cost of 4 {{.*}} lshr ; SSE2-CODEGEN: shift2i32 ; SSE2-CODEGEN: psrlq @@ -125,7 +125,7 @@ entry: define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) { entry: ; SSE2: shift2i64 - ; SSE2: cost of 20 {{.*}} lshr + ; SSE2: cost of 4 {{.*}} lshr ; SSE2-CODEGEN: shift2i64 ; SSE2-CODEGEN: psrlq @@ -137,7 +137,7 @@ entry: define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) { entry: ; SSE2: shift4i64 - ; SSE2: cost of 40 {{.*}} lshr + ; SSE2: cost of 8 {{.*}} lshr ; SSE2-CODEGEN: shift4i64 ; SSE2-CODEGEN: psrlq @@ -149,7 +149,7 @@ entry: define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) { entry: ; SSE2: shift8i64 - ; SSE2: cost of 80 {{.*}} lshr + ; SSE2: cost of 16 {{.*}} lshr ; SSE2-CODEGEN: shift8i64 ; SSE2-CODEGEN: psrlq @@ -161,7 +161,7 @@ entry: define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) { entry: ; SSE2: shift16i64 - ; SSE2: cost of 160 {{.*}} lshr + ; SSE2: cost of 32 {{.*}} lshr ; SSE2-CODEGEN: shift16i64 ; SSE2-CODEGEN: psrlq @@ -173,7 +173,7 @@ entry: define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) { entry: ; SSE2: shift32i64 - ; SSE2: cost of 320 {{.*}} lshr + ; SSE2: cost of 64 {{.*}} lshr ; SSE2-CODEGEN: shift32i64 ; SSE2-CODEGEN: psrlq @@ -185,7 +185,7 @@ entry: define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) { entry: ; SSE2: shift2i8 - ; SSE2: cost of 20 {{.*}} lshr + ; SSE2: cost of 4 {{.*}} lshr ; SSE2-CODEGEN: shift2i8 ; SSE2-CODEGEN: psrlq diff --git a/test/Analysis/CostModel/X86/testshiftshl.ll b/test/Analysis/CostModel/X86/testshiftshl.ll index d4e33818932..e385c5bfeea 100644 --- a/test/Analysis/CostModel/X86/testshiftshl.ll +++ b/test/Analysis/CostModel/X86/testshiftshl.ll @@ -5,7 +5,7 @@ define %shifttype @shift2i16(%shifttype %a, %shifttype %b) { entry: ; SSE2: shift2i16 - ; SSE2: cost of 20 {{.*}} shl + ; SSE2: cost of 4 {{.*}} shl ; SSE2-CODEGEN: shift2i16 ; SSE2-CODEGEN: psllq @@ -65,7 +65,7 @@ entry: define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) { entry: ; SSE2: shift2i32 - ; SSE2: cost of 20 {{.*}} shl + ; SSE2: cost of 4 {{.*}} shl ; SSE2-CODEGEN: shift2i32 ; SSE2-CODEGEN: psllq @@ -125,7 +125,7 @@ entry: define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) { entry: ; SSE2: shift2i64 - ; SSE2: cost of 20 {{.*}} shl + ; SSE2: cost of 4 {{.*}} shl ; SSE2-CODEGEN: shift2i64 ; SSE2-CODEGEN: psllq @@ -137,7 +137,7 @@ entry: define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) { entry: ; SSE2: shift4i64 - ; SSE2: cost of 40 {{.*}} shl + ; SSE2: cost of 8 {{.*}} shl ; SSE2-CODEGEN: shift4i64 ; SSE2-CODEGEN: psllq @@ -149,7 +149,7 @@ entry: define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) { entry: ; SSE2: shift8i64 - ; SSE2: cost of 80 {{.*}} shl + ; SSE2: cost of 16 {{.*}} shl ; SSE2-CODEGEN: shift8i64 ; SSE2-CODEGEN: psllq @@ -161,7 +161,7 @@ entry: define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) { entry: ; SSE2: shift16i64 - ; SSE2: cost of 160 {{.*}} shl + ; SSE2: cost of 32 {{.*}} shl ; SSE2-CODEGEN: shift16i64 ; SSE2-CODEGEN: psllq @@ -173,7 +173,7 @@ entry: define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) { entry: ; SSE2: shift32i64 - ; SSE2: cost of 320 {{.*}} shl + ; SSE2: cost of 64 {{.*}} shl ; SSE2-CODEGEN: shift32i64 ; SSE2-CODEGEN: psllq @@ -185,7 +185,7 @@ entry: define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) { entry: ; SSE2: shift2i8 - ; SSE2: cost of 20 {{.*}} shl + ; SSE2: cost of 4 {{.*}} shl ; SSE2-CODEGEN: shift2i8 ; SSE2-CODEGEN: psllq diff --git a/test/Analysis/CostModel/X86/vshift-cost.ll b/test/Analysis/CostModel/X86/vshift-cost.ll index 84d72463ac0..dd93badc893 100644 --- a/test/Analysis/CostModel/X86/vshift-cost.ll +++ b/test/Analysis/CostModel/X86/vshift-cost.ll @@ -62,9 +62,9 @@ define <2 x i64> @test5(<2 x i64> %a) { ret <2 x i64> %shl } ; CHECK: 'Cost Model Analysis' for function 'test5': -; SSE2: Found an estimated cost of 20 for instruction: %shl -; SSE41: Found an estimated cost of 20 for instruction: %shl -; AVX: Found an estimated cost of 20 for instruction: %shl +; SSE2: Found an estimated cost of 4 for instruction: %shl +; SSE41: Found an estimated cost of 4 for instruction: %shl +; AVX: Found an estimated cost of 4 for instruction: %shl ; AVX2: Found an estimated cost of 1 for instruction: %shl @@ -117,9 +117,9 @@ define <4 x i64> @test8(<4 x i64> %a) { ret <4 x i64> %shl } ; CHECK: 'Cost Model Analysis' for function 'test8': -; SSE2: Found an estimated cost of 40 for instruction: %shl -; SSE41: Found an estimated cost of 40 for instruction: %shl -; AVX: Found an estimated cost of 40 for instruction: %shl +; SSE2: Found an estimated cost of 8 for instruction: %shl +; SSE41: Found an estimated cost of 8 for instruction: %shl +; AVX: Found an estimated cost of 8 for instruction: %shl ; AVX2: Found an estimated cost of 1 for instruction: %shl @@ -159,9 +159,9 @@ define <8 x i64> @test11(<8 x i64> %a) { ret <8 x i64> %shl } ; CHECK: 'Cost Model Analysis' for function 'test11': -; SSE2: Found an estimated cost of 80 for instruction: %shl -; SSE41: Found an estimated cost of 80 for instruction: %shl -; AVX: Found an estimated cost of 80 for instruction: %shl +; SSE2: Found an estimated cost of 16 for instruction: %shl +; SSE41: Found an estimated cost of 16 for instruction: %shl +; AVX: Found an estimated cost of 16 for instruction: %shl ; AVX2: Found an estimated cost of 2 for instruction: %shl