def int_x86_avx2_vbroadcast_ss_ps_256 :
GCCBuiltin<"__builtin_ia32_vbroadcastss_ps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
- def int_x86_avx2_vbroadcasti128 :
- Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
def int_x86_avx2_pbroadcastb_128 :
GCCBuiltin<"__builtin_ia32_pbroadcastb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
Name == "x86.avx2.pblendw" ||
Name == "x86.avx2.pblendd.128" ||
Name == "x86.avx2.pblendd.256" ||
+ Name == "x86.avx2.vbroadcasti128" ||
(Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
NewFn = nullptr;
return true;
for (unsigned I = 0; I < EltNum; ++I)
Rep = Builder.CreateInsertElement(Rep, Load,
ConstantInt::get(I32Ty, I));
+ } else if (Name == "llvm.x86.avx2.vbroadcasti128") {
+ // Replace vbroadcasts with a vector shuffle.
+ Value *Op = Builder.CreatePointerCast(
+ CI->getArgOperand(0),
+ PointerType::getUnqual(VectorType::get(Type::getInt64Ty(C), 2)));
+ Value *Load = Builder.CreateLoad(Op);
+ SmallVector<Constant *, 4> Idxs; // 0, 1, 0, 1.
+ for (unsigned i = 0; i != 4; ++i)
+ Idxs.push_back(Builder.getInt32(i & 1));
+ Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
+ ConstantVector::get(Idxs));
} else if (Name == "llvm.x86.sse2.psll.dq") {
// 128-bit shift left specified in bits.
unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
int_x86_avx2_vbroadcast_sd_pd_256,
WriteFShuffle256>, VEX_L;
-let Predicates = [HasAVX2] in
-def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
- int_x86_avx2_vbroadcasti128, WriteLoad>,
- VEX_L;
-
let Predicates = [HasAVX] in
def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
(VBROADCASTF128 addr:$src)>;
declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
-define <4 x i64> @test_x86_avx2_vbroadcasti128(i8* %a0) {
- ; CHECK: vbroadcasti128
- %res = call <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8* %a0) ; <<4 x i64>> [#uses=1]
- ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8*) nounwind readonly
-
define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) {
; CHECK: vbroadcastsd
%res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) ; <<4 x double>> [#uses=1]
--- /dev/null
+; RUN: llc -mattr=+avx2 < %s | FileCheck %s
+
+; Check that we properly upgrade the AVX2 vbroadcast intrinsic to IR.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+define <4 x i64> @broadcast128(<2 x i64> %src) {
+ CHECK-LABEL: broadcast128
+ CHECK: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+ %1 = alloca <2 x i64>, align 16
+ %2 = bitcast <2 x i64>* %1 to i8*
+ store <2 x i64> %src, <2 x i64>* %1, align 16
+ %3 = call <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8* %2)
+ ret <4 x i64> %3
+}
+
+declare <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8*) #1