(VPBROADCASTQrm addr:$src)>;
def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
(VPBROADCASTQYrm addr:$src)>;
+
+ // Provide fallback in case the load node that is used in the patterns above
+ // is used by additional users, which prevents the pattern selection.
+ let AddedComplexity = 20 in {
+ def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
+ (VBROADCASTSSrr
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
+ def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
+ (VBROADCASTSSYrr
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
+ def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
+ (VBROADCASTSDrr
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd))>;
+ }
}
// AVX1 broadcast patterns
(VBROADCASTSSYrm addr:$src)>;
def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
(VBROADCASTSDrm addr:$src)>;
-
def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
(VBROADCASTSSrm addr:$src)>;
def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
(VBROADCASTSSrm addr:$src)>;
+
+ // Provide fallback in case the load node that is used in the patterns above
+ // is used by additional users, which prevents the pattern selection.
+ let AddedComplexity = 20 in {
+ // 128bit broadcasts:
+ def : Pat<(v2f64 (X86VBroadcast FR64:$src)),
+ (VPSHUFDri
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd), 0)>;
+ def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
+ (VPSHUFDri
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), 0)>;
+ def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
+ (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
+ (VPSHUFDri
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), 0),
+ sub_xmm),
+ (VPSHUFDri
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss),
+ 0), 1)>;
+ def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
+ (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
+ (VPSHUFDri
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd), 0),
+ sub_xmm),
+ (VPSHUFDri
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd),
+ 0), 1)>;
+ }
}
//===----------------------------------------------------------------------===//
ret <8 x i32> %g
}
+; CHECK: V113
+; CHECK: vbroadcastss
+; CHECK: ret
+define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
+entry:
+ %g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000>
+ ret <8 x float> %g
+}
+
; CHECK: _e2
; CHECK: vbroadcastss
; CHECK: ret
%vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
%vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
%vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3
- %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 3
- %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 3
- %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 3
- %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 3
+ %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 4
+ %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 5
+ %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 6
+ %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 7
ret <8 x i8> %vecinit7.i
}
+
+
+define void @crash() nounwind alwaysinline {
+WGLoopsEntry:
+ br i1 undef, label %ret, label %footer329VF
+
+footer329VF:
+ %A.0.inVF = fmul float undef, 6.553600e+04
+ %B.0.in407VF = fmul <8 x float> undef, <float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04, float 6.553600e+04>
+ %A.0VF = fptosi float %A.0.inVF to i32
+ %B.0408VF = fptosi <8 x float> %B.0.in407VF to <8 x i32>
+ %0 = and <8 x i32> %B.0408VF, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+ %1 = and i32 %A.0VF, 65535
+ %temp1098VF = insertelement <8 x i32> undef, i32 %1, i32 0
+ %vector1099VF = shufflevector <8 x i32> %temp1098VF, <8 x i32> undef, <8 x i32> zeroinitializer
+ br i1 undef, label %preload1201VF, label %footer349VF
+
+preload1201VF:
+ br label %footer349VF
+
+footer349VF:
+ %2 = mul nsw <8 x i32> undef, %0
+ %3 = mul nsw <8 x i32> undef, %vector1099VF
+ br label %footer329VF
+
+ret:
+ ret void
+}