From: Simon Pilgrim Date: Sun, 11 Oct 2015 17:34:32 +0000 (+0000) Subject: [X86] Renamed SHL cost model tests X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=6bb2edf33ac4781689b8b059b5e8a8d571e77227;p=oota-llvm.git [X86] Renamed SHL cost model tests Matches naming conventions for ASHR/LSHR cost tests As discussed in D8690. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@249984 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/Analysis/CostModel/X86/vshift-cost.ll b/test/Analysis/CostModel/X86/vshift-cost.ll deleted file mode 100644 index 5a0c86cdc7c..00000000000 --- a/test/Analysis/CostModel/X86/vshift-cost.ll +++ /dev/null @@ -1,182 +0,0 @@ -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2 - - -; Verify the cost of vector shift left instructions. - -; We always emit a single pmullw in the case of v8i16 vector shifts by -; non-uniform constant. - -define <8 x i16> @test1(<8 x i16> %a) { - %shl = shl <8 x i16> %a, - ret <8 x i16> %shl -} -; CHECK: 'Cost Model Analysis' for function 'test1': -; CHECK: Found an estimated cost of 1 for instruction: %shl - - -define <8 x i16> @test2(<8 x i16> %a) { - %shl = shl <8 x i16> %a, - ret <8 x i16> %shl -} -; CHECK: 'Cost Model Analysis' for function 'test2': -; CHECK: Found an estimated cost of 1 for instruction: %shl - - -; With SSE4.1, v4i32 shifts can be lowered into a single pmulld instruction. -; Make sure that the estimated cost is always 1 except for the case where -; we only have SSE2 support. With SSE2, we are forced to special lower the -; v4i32 mul as a 2x shuffle, 2x pmuludq, 2x shuffle. - -define <4 x i32> @test3(<4 x i32> %a) { - %shl = shl <4 x i32> %a, - ret <4 x i32> %shl -} -; CHECK: 'Cost Model Analysis' for function 'test3': -; SSE2: Found an estimated cost of 6 for instruction: %shl -; SSE41: Found an estimated cost of 1 for instruction: %shl -; AVX: Found an estimated cost of 1 for instruction: %shl -; AVX2: Found an estimated cost of 1 for instruction: %shl -; XOP: Found an estimated cost of 1 for instruction: %shl - - -define <4 x i32> @test4(<4 x i32> %a) { - %shl = shl <4 x i32> %a, - ret <4 x i32> %shl -} -; CHECK: 'Cost Model Analysis' for function 'test4': -; SSE2: Found an estimated cost of 6 for instruction: %shl -; SSE41: Found an estimated cost of 1 for instruction: %shl -; AVX: Found an estimated cost of 1 for instruction: %shl -; AVX2: Found an estimated cost of 1 for instruction: %shl -; XOP: Found an estimated cost of 1 for instruction: %shl - - -; On AVX2 we are able to lower the following shift into a single -; vpsllvq. Therefore, the expected cost is only 1. -; In all other cases, this shift is scalarized as the target does not support -; vpsllv instructions. - -define <2 x i64> @test5(<2 x i64> %a) { - %shl = shl <2 x i64> %a, - ret <2 x i64> %shl -} -; CHECK: 'Cost Model Analysis' for function 'test5': -; SSE2: Found an estimated cost of 4 for instruction: %shl -; SSE41: Found an estimated cost of 4 for instruction: %shl -; AVX: Found an estimated cost of 4 for instruction: %shl -; AVX2: Found an estimated cost of 1 for instruction: %shl -; XOP: Found an estimated cost of 1 for instruction: %shl - - -; v16i16 and v8i32 shift left by non-uniform constant are lowered into -; vector multiply instructions. With AVX (but not AVX2), the vector multiply -; is lowered into a sequence of: 1 extract + 2 vpmullw + 1 insert. -; -; With AVX2, instruction vpmullw works with 256bit quantities and -; therefore there is no need to split the resulting vector multiply into -; a sequence of two multiply. -; -; With SSE2 and SSE4.1, the vector shift cost for 'test6' is twice -; the cost computed in the case of 'test1'. That is because the backend -; simply emits 2 pmullw with no extract/insert. - - -define <16 x i16> @test6(<16 x i16> %a) { - %shl = shl <16 x i16> %a, - ret <16 x i16> %shl -} -; CHECK: 'Cost Model Analysis' for function 'test6': -; SSE2: Found an estimated cost of 2 for instruction: %shl -; SSE41: Found an estimated cost of 2 for instruction: %shl -; AVX: Found an estimated cost of 4 for instruction: %shl -; AVX2: Found an estimated cost of 1 for instruction: %shl -; XOPAVX: Found an estimated cost of 2 for instruction: %shl -; XOPAVX2: Found an estimated cost of 1 for instruction: %shl - - -; With SSE2 and SSE4.1, the vector shift cost for 'test7' is twice -; the cost computed in the case of 'test3'. That is because the multiply -; is type-legalized into two 4i32 vector multiply. - -define <8 x i32> @test7(<8 x i32> %a) { - %shl = shl <8 x i32> %a, - ret <8 x i32> %shl -} -; CHECK: 'Cost Model Analysis' for function 'test7': -; SSE2: Found an estimated cost of 12 for instruction: %shl -; SSE41: Found an estimated cost of 2 for instruction: %shl -; AVX: Found an estimated cost of 4 for instruction: %shl -; AVX2: Found an estimated cost of 1 for instruction: %shl -; XOPAVX: Found an estimated cost of 2 for instruction: %shl -; XOPAVX2: Found an estimated cost of 1 for instruction: %shl - - -; On AVX2 we are able to lower the following shift into a single -; vpsllvq. Therefore, the expected cost is only 1. -; In all other cases, this shift is scalarized as the target does not support -; vpsllv instructions. - -define <4 x i64> @test8(<4 x i64> %a) { - %shl = shl <4 x i64> %a, - ret <4 x i64> %shl -} -; CHECK: 'Cost Model Analysis' for function 'test8': -; SSE2: Found an estimated cost of 8 for instruction: %shl -; SSE41: Found an estimated cost of 8 for instruction: %shl -; AVX: Found an estimated cost of 8 for instruction: %shl -; AVX2: Found an estimated cost of 1 for instruction: %shl -; XOPAVX: Found an estimated cost of 2 for instruction: %shl -; XOPAVX2: Found an estimated cost of 1 for instruction: %shl - - -; Same as 'test6', with the difference that the cost is double. - -define <32 x i16> @test9(<32 x i16> %a) { - %shl = shl <32 x i16> %a, - ret <32 x i16> %shl -} -; CHECK: 'Cost Model Analysis' for function 'test9': -; SSE2: Found an estimated cost of 4 for instruction: %shl -; SSE41: Found an estimated cost of 4 for instruction: %shl -; AVX: Found an estimated cost of 8 for instruction: %shl -; AVX2: Found an estimated cost of 2 for instruction: %shl -; XOPAVX: Found an estimated cost of 4 for instruction: %shl -; XOPAVX2: Found an estimated cost of 2 for instruction: %shl - - -; Same as 'test7', except that now the cost is double. - -define <16 x i32> @test10(<16 x i32> %a) { - %shl = shl <16 x i32> %a, - ret <16 x i32> %shl -} -; CHECK: 'Cost Model Analysis' for function 'test10': -; SSE2: Found an estimated cost of 24 for instruction: %shl -; SSE41: Found an estimated cost of 4 for instruction: %shl -; AVX: Found an estimated cost of 8 for instruction: %shl -; AVX2: Found an estimated cost of 2 for instruction: %shl -; XOPAVX: Found an estimated cost of 4 for instruction: %shl -; XOPAVX2: Found an estimated cost of 2 for instruction: %shl - - -; On AVX2 we are able to lower the following shift into a sequence of -; two vpsllvq instructions. Therefore, the expected cost is only 2. -; In all other cases, this shift is scalarized as we don't have vpsllv -; instructions. - -define <8 x i64> @test11(<8 x i64> %a) { - %shl = shl <8 x i64> %a, - ret <8 x i64> %shl -} -; CHECK: 'Cost Model Analysis' for function 'test11': -; SSE2: Found an estimated cost of 16 for instruction: %shl -; SSE41: Found an estimated cost of 16 for instruction: %shl -; AVX: Found an estimated cost of 16 for instruction: %shl -; AVX2: Found an estimated cost of 2 for instruction: %shl -; XOPAVX: Found an estimated cost of 4 for instruction: %shl -; XOPAVX2: Found an estimated cost of 2 for instruction: %shl diff --git a/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/test/Analysis/CostModel/X86/vshift-shl-cost.ll new file mode 100644 index 00000000000..5a0c86cdc7c --- /dev/null +++ b/test/Analysis/CostModel/X86/vshift-shl-cost.ll @@ -0,0 +1,182 @@ +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2 + + +; Verify the cost of vector shift left instructions. + +; We always emit a single pmullw in the case of v8i16 vector shifts by +; non-uniform constant. + +define <8 x i16> @test1(<8 x i16> %a) { + %shl = shl <8 x i16> %a, + ret <8 x i16> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test1': +; CHECK: Found an estimated cost of 1 for instruction: %shl + + +define <8 x i16> @test2(<8 x i16> %a) { + %shl = shl <8 x i16> %a, + ret <8 x i16> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test2': +; CHECK: Found an estimated cost of 1 for instruction: %shl + + +; With SSE4.1, v4i32 shifts can be lowered into a single pmulld instruction. +; Make sure that the estimated cost is always 1 except for the case where +; we only have SSE2 support. With SSE2, we are forced to special lower the +; v4i32 mul as a 2x shuffle, 2x pmuludq, 2x shuffle. + +define <4 x i32> @test3(<4 x i32> %a) { + %shl = shl <4 x i32> %a, + ret <4 x i32> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test3': +; SSE2: Found an estimated cost of 6 for instruction: %shl +; SSE41: Found an estimated cost of 1 for instruction: %shl +; AVX: Found an estimated cost of 1 for instruction: %shl +; AVX2: Found an estimated cost of 1 for instruction: %shl +; XOP: Found an estimated cost of 1 for instruction: %shl + + +define <4 x i32> @test4(<4 x i32> %a) { + %shl = shl <4 x i32> %a, + ret <4 x i32> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test4': +; SSE2: Found an estimated cost of 6 for instruction: %shl +; SSE41: Found an estimated cost of 1 for instruction: %shl +; AVX: Found an estimated cost of 1 for instruction: %shl +; AVX2: Found an estimated cost of 1 for instruction: %shl +; XOP: Found an estimated cost of 1 for instruction: %shl + + +; On AVX2 we are able to lower the following shift into a single +; vpsllvq. Therefore, the expected cost is only 1. +; In all other cases, this shift is scalarized as the target does not support +; vpsllv instructions. + +define <2 x i64> @test5(<2 x i64> %a) { + %shl = shl <2 x i64> %a, + ret <2 x i64> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test5': +; SSE2: Found an estimated cost of 4 for instruction: %shl +; SSE41: Found an estimated cost of 4 for instruction: %shl +; AVX: Found an estimated cost of 4 for instruction: %shl +; AVX2: Found an estimated cost of 1 for instruction: %shl +; XOP: Found an estimated cost of 1 for instruction: %shl + + +; v16i16 and v8i32 shift left by non-uniform constant are lowered into +; vector multiply instructions. With AVX (but not AVX2), the vector multiply +; is lowered into a sequence of: 1 extract + 2 vpmullw + 1 insert. +; +; With AVX2, instruction vpmullw works with 256bit quantities and +; therefore there is no need to split the resulting vector multiply into +; a sequence of two multiply. +; +; With SSE2 and SSE4.1, the vector shift cost for 'test6' is twice +; the cost computed in the case of 'test1'. That is because the backend +; simply emits 2 pmullw with no extract/insert. + + +define <16 x i16> @test6(<16 x i16> %a) { + %shl = shl <16 x i16> %a, + ret <16 x i16> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test6': +; SSE2: Found an estimated cost of 2 for instruction: %shl +; SSE41: Found an estimated cost of 2 for instruction: %shl +; AVX: Found an estimated cost of 4 for instruction: %shl +; AVX2: Found an estimated cost of 1 for instruction: %shl +; XOPAVX: Found an estimated cost of 2 for instruction: %shl +; XOPAVX2: Found an estimated cost of 1 for instruction: %shl + + +; With SSE2 and SSE4.1, the vector shift cost for 'test7' is twice +; the cost computed in the case of 'test3'. That is because the multiply +; is type-legalized into two 4i32 vector multiply. + +define <8 x i32> @test7(<8 x i32> %a) { + %shl = shl <8 x i32> %a, + ret <8 x i32> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test7': +; SSE2: Found an estimated cost of 12 for instruction: %shl +; SSE41: Found an estimated cost of 2 for instruction: %shl +; AVX: Found an estimated cost of 4 for instruction: %shl +; AVX2: Found an estimated cost of 1 for instruction: %shl +; XOPAVX: Found an estimated cost of 2 for instruction: %shl +; XOPAVX2: Found an estimated cost of 1 for instruction: %shl + + +; On AVX2 we are able to lower the following shift into a single +; vpsllvq. Therefore, the expected cost is only 1. +; In all other cases, this shift is scalarized as the target does not support +; vpsllv instructions. + +define <4 x i64> @test8(<4 x i64> %a) { + %shl = shl <4 x i64> %a, + ret <4 x i64> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test8': +; SSE2: Found an estimated cost of 8 for instruction: %shl +; SSE41: Found an estimated cost of 8 for instruction: %shl +; AVX: Found an estimated cost of 8 for instruction: %shl +; AVX2: Found an estimated cost of 1 for instruction: %shl +; XOPAVX: Found an estimated cost of 2 for instruction: %shl +; XOPAVX2: Found an estimated cost of 1 for instruction: %shl + + +; Same as 'test6', with the difference that the cost is double. + +define <32 x i16> @test9(<32 x i16> %a) { + %shl = shl <32 x i16> %a, + ret <32 x i16> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test9': +; SSE2: Found an estimated cost of 4 for instruction: %shl +; SSE41: Found an estimated cost of 4 for instruction: %shl +; AVX: Found an estimated cost of 8 for instruction: %shl +; AVX2: Found an estimated cost of 2 for instruction: %shl +; XOPAVX: Found an estimated cost of 4 for instruction: %shl +; XOPAVX2: Found an estimated cost of 2 for instruction: %shl + + +; Same as 'test7', except that now the cost is double. + +define <16 x i32> @test10(<16 x i32> %a) { + %shl = shl <16 x i32> %a, + ret <16 x i32> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test10': +; SSE2: Found an estimated cost of 24 for instruction: %shl +; SSE41: Found an estimated cost of 4 for instruction: %shl +; AVX: Found an estimated cost of 8 for instruction: %shl +; AVX2: Found an estimated cost of 2 for instruction: %shl +; XOPAVX: Found an estimated cost of 4 for instruction: %shl +; XOPAVX2: Found an estimated cost of 2 for instruction: %shl + + +; On AVX2 we are able to lower the following shift into a sequence of +; two vpsllvq instructions. Therefore, the expected cost is only 2. +; In all other cases, this shift is scalarized as we don't have vpsllv +; instructions. + +define <8 x i64> @test11(<8 x i64> %a) { + %shl = shl <8 x i64> %a, + ret <8 x i64> %shl +} +; CHECK: 'Cost Model Analysis' for function 'test11': +; SSE2: Found an estimated cost of 16 for instruction: %shl +; SSE41: Found an estimated cost of 16 for instruction: %shl +; AVX: Found an estimated cost of 16 for instruction: %shl +; AVX2: Found an estimated cost of 2 for instruction: %shl +; XOPAVX: Found an estimated cost of 4 for instruction: %shl +; XOPAVX2: Found an estimated cost of 2 for instruction: %shl