return SDValue();
}
- if (VT.isVector()) {
- auto ExtendToVec128 = [&DAG](SDLoc DL, SDValue N) {
+ if (VT.isVector() && Subtarget->hasSSE2()) {
+ auto ExtendVecSize = [&DAG](SDLoc DL, SDValue N, unsigned Size) {
EVT InVT = N.getValueType();
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(),
- 128 / InVT.getScalarSizeInBits());
- SmallVector<SDValue, 8> Opnds(128 / InVT.getSizeInBits(),
+ Size / InVT.getScalarSizeInBits());
+ SmallVector<SDValue, 8> Opnds(Size / InVT.getSizeInBits(),
DAG.getUNDEF(InVT));
Opnds[0] = N;
return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Opnds);
};
+ // If target-size is less than 128-bits, extend to a type that would extend
+ // to 128 bits, extend that and extract the original target vector.
+ if (VT.getSizeInBits() < 128 && !(128 % VT.getSizeInBits()) &&
+ (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) &&
+ (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) {
+ unsigned Scale = 128 / VT.getSizeInBits();
+ EVT ExVT =
+ EVT::getVectorVT(*DAG.getContext(), SVT, 128 / SVT.getSizeInBits());
+ SDValue Ex = ExtendVecSize(DL, N0, Scale * InVT.getSizeInBits());
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, ExVT, Ex);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SExt,
+ DAG.getIntPtrConstant(0, DL));
+ }
+
// If target-size is 128-bits, then convert to ISD::SIGN_EXTEND_VECTOR_INREG
// which ensures lowering to X86ISD::VSEXT (pmovsx*).
if (VT.getSizeInBits() == 128 &&
(SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) &&
(InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) {
- SDValue ExOp = ExtendToVec128(DL, N0);
+ SDValue ExOp = ExtendVecSize(DL, N0, 128);
return DAG.getSignExtendVectorInReg(ExOp, DL, VT);
}
++i, Offset += NumSubElts) {
SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
DAG.getIntPtrConstant(Offset, DL));
- SrcVec = ExtendToVec128(DL, SrcVec);
+ SrcVec = ExtendVecSize(DL, SrcVec, 128);
SrcVec = DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT);
Opnds.push_back(SrcVec);
}
ret <4 x i64>%B
}
+define i32 @sext_2i8_to_i32(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_2i8_to_i32:
+; SSE2: # BB#0: # %entry
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: psraw $8, %xmm0
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: retq
+;
+; SSSE3-LABEL: sext_2i8_to_i32:
+; SSSE3: # BB#0: # %entry
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT: psraw $8, %xmm0
+; SSSE3-NEXT: movd %xmm0, %eax
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: sext_2i8_to_i32:
+; SSE41: # BB#0: # %entry
+; SSE41-NEXT: pmovsxbw %xmm0, %xmm0
+; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: sext_2i8_to_i32:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vpmovsxbw %xmm0, %xmm0
+; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: retq
+;
+; X32-SSE41-LABEL: sext_2i8_to_i32:
+; X32-SSE41: # BB#0: # %entry
+; X32-SSE41: pmovsxbw %xmm0, %xmm0
+; X32-SSE41-NEXT: movd %xmm0, %eax
+; X32-SSE41-NEXT: popl %edx
+; X32-SSE41-NEXT: retl
+entry:
+ %Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+ %Ex = sext <2 x i8> %Shuf to <2 x i16>
+ %Bc = bitcast <2 x i16> %Ex to i32
+ ret i32 %Bc
+}
+
define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) {
; SSE2-LABEL: load_sext_test1:
; SSE2: # BB#0: # %entry