//===----------------------------------------------------------------------===//
def CCC_SPU : CallingConv<[
CCIfType<[i8, i16, i32, i64, i128, f32, f64,
- v16i8, v8i16, v4i32, v4f32, v2i64, v2f64],
+ v16i8, v8i16, v4i32, v4f32, v2i64, v2f64, v2i32],
CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
R12, R13, R14, R15, R16, R17, R18, R19, R20,
R21, R22, R23, R24, R25, R26, R27, R28, R29,
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
+ case MVT::v2i32:
ArgRegClass = &SPU::VECREGRegClass;
break;
}
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
}
case MVT::v2i32: {
- SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
+ return SDValue();
}
case MVT::v2i64: {
return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
} else if (EltVT == MVT::i16) {
V2EltIdx0 = 8;
maskVT = MVT::v8i16;
+ } else if (VecVT == MVT::v2i32 || VecVT == MVT::v2f32 ) {
+ V2EltIdx0 = 2;
+ maskVT = MVT::v4i32;
} else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
V2EltIdx0 = 4;
maskVT = MVT::v4i32;
for (unsigned j = 0; j < BytesPerElement; ++j)
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
}
+ // For half vectors padd the mask with zeros for the second half.
+ // This is needed because mask is assumed to be full vector elsewhere in
+ // the SPU backend.
+ if(VecVT == MVT::v2i32 || VecVT == MVT::v2f32)
+ for( unsigned i = 0; i < 2; ++i )
+ {
+ for (unsigned j = 0; j < BytesPerElement; ++j)
+ ResultMask.push_back(DAG.getConstant(0,MVT::i8));
+ }
SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
&ResultMask[0], ResultMask.size());
case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
+ case MVT::v2i32: n_copies = 2; VT = MVT::i32; break;
}
SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
// Variable index: Rotate the requested element into slot 0, then replicate
// slot 0 across the vector
EVT VecVT = N.getValueType();
- if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
+ if (!VecVT.isSimple() || !VecVT.isVector()) {
report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
"vector type!");
}
multiclass AddInstruction {
def v4i32: AVecInst<v4i32>;
def v16i8: AVecInst<v16i8>;
-
+ def v2i32: AVecInst<v2i32>;
def r32: ARegInst<R32C>;
}
"sf\t$rT, $rA, $rB", IntegerOp,
[(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>;
+def SF2vec : RRForm<0b00000010000, (outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB),
+ "sf\t$rT, $rA, $rB", IntegerOp,
+ [(set (v2i32 VECREG:$rT), (sub (v2i32 VECREG:$rB), (v2i32 VECREG:$rA)))]>;
+
+
def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
"sf\t$rT, $rA, $rB", IntegerOp,
[(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>;
MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
[/* no pattern */]>;
+def MPYUv2i32:
+ MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
def MPYUr16:
MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
[(set R32C:$rT, (mul (zext R16C:$rA), (zext R16C:$rB)))]>;
MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
[/* no pattern */]>;
+def MPYHv2i32:
+ MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
def MPYHr32:
MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
[/* no pattern */]>;
def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)),
(ORv4i32_i32 R32C:$rA)>;
+def : Pat<(v2i32 (SPUprefslot2vec R32C:$rA)),
+ (ORv4i32_i32 R32C:$rA)>;
+
def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)),
(ORv2i64_i64 R64C:$rA)>;
def : Pat<(SPUvec2prefslot (v4i32 VECREG:$rA)),
(ORi32_v4i32 VECREG:$rA)>;
+def : Pat<(SPUvec2prefslot (v2i32 VECREG:$rA)),
+ (ORi32_v4i32 VECREG:$rA)>;
+
def : Pat<(SPUvec2prefslot (v2i64 VECREG:$rA)),
(ORi64_v2i64 VECREG:$rA)>;
def v8i16_m32 : SHUFBVecInst<v8i16, v4i32>;
def v4i32 : SHUFBVecInst<v4i32, v16i8>;
def v4i32_m32 : SHUFBVecInst<v4i32, v4i32>;
+ def v2i32 : SHUFBVecInst<v2i32, v16i8>;
+ def v2i32_m32 : SHUFBVecInst<v2i32, v4i32>;
def v2i64 : SHUFBVecInst<v2i64, v16i8>;
def v2i64_m32 : SHUFBVecInst<v2i64, v4i32>;
(FSMBIv8i16 0xcccc))>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// v4i32, i32 multiply instruction sequence:
+// v4i32, v2i32, i32 multiply instruction sequence:
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
def MPYv4i32:
(v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))),
(v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>;
+def MPYv2i32:
+ Pat<(mul (v2i32 VECREG:$rA), (v2i32 VECREG:$rB)),
+ (Av2i32
+ (v2i32 (Av2i32 (v2i32 (MPYHv2i32 VECREG:$rA, VECREG:$rB)),
+ (v2i32 (MPYHv2i32 VECREG:$rB, VECREG:$rA)))),
+ (v2i32 (MPYUv2i32 VECREG:$rA, VECREG:$rB)))>;
+
+
def MPYi32:
Pat<(mul R32C:$rA, R32C:$rB),
(Ar32
--- /dev/null
+;RUN: llc --march=cellspu %s -o - | FileCheck %s
+%vec = type <2 x i32>
+
+define %vec @test_ret(%vec %param)
+{
+;CHECK: bi $lr
+ ret %vec %param
+}
+
+define %vec @test_add(%vec %param)
+{
+;CHECK: a $3, $3, $3
+ %1 = add %vec %param, %param
+;CHECK: bi $lr
+ ret %vec %1
+}
+
+define %vec @test_sub(%vec %param)
+{
+;CHECK: sf $3, $4, $3
+ %1 = sub %vec %param, <i32 1, i32 1>
+
+;CHECK: bi $lr
+ ret %vec %1
+}
+
+define %vec @test_mul(%vec %param)
+{
+;CHECK: mpyu
+;CHECK: mpyh
+;CHECK: a
+;CHECK: a $3
+ %1 = mul %vec %param, %param
+
+;CHECK: bi $lr
+ ret %vec %1
+}
+
+define <2 x i32> @test_splat(i32 %param ) {
+;TODO insertelement transforms to a PREFSLOT2VEC, that trasforms to the
+; somewhat redundant:
+;CHECK-NOT or $3, $3, $3
+;CHECK: lqa
+;CHECK: shufb
+ %sv = insertelement <1 x i32> undef, i32 %param, i32 0
+ %rv = shufflevector <1 x i32> %sv, <1 x i32> undef, <2 x i32> zeroinitializer
+;CHECK: bi $lr
+ ret <2 x i32> %rv
+}
+
+define i32 @test_extract() {
+;CHECK: shufb $3
+ %rv = extractelement <2 x i32> zeroinitializer, i32 undef ; <i32> [#uses=1]
+;CHECK: bi $lr
+ ret i32 %rv
+}
+