setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
}
- setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i8 , Expand);
if (Subtarget->hasLZCNT()) {
setOperationAction(ISD::CTLZ , MVT::i8 , Promote);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Expand);
+ if (Subtarget->is64Bit())
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
} else {
setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
- if (Subtarget->is64Bit())
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
+ if (Subtarget->is64Bit()) {
setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
+ }
}
if (Subtarget->hasPOPCNT()) {
return Op;
}
+SDValue X86TargetLowering::LowerCTLZ_ZERO_UNDEF(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ EVT OpVT = VT;
+ unsigned NumBits = VT.getSizeInBits();
+ DebugLoc dl = Op.getDebugLoc();
+
+ Op = Op.getOperand(0);
+ if (VT == MVT::i8) {
+ // Zero extend to i32 since there is not an i8 bsr.
+ OpVT = MVT::i32;
+ Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
+ }
+
+ // Issue a bsr (scan bits in reverse).
+ SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+ Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op);
+
+ // And xor with NumBits-1.
+ Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
+
+ if (VT == MVT::i8)
+ Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
+ return Op;
+}
+
SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
EVT OpVT = VT;
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
+ case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
case ISD::SRA:
def : Pat<(cttz_zero_undef (loadi16 addr:$src)), (BSF16rm addr:$src)>;
def : Pat<(cttz_zero_undef (loadi32 addr:$src)), (BSF32rm addr:$src)>;
def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm addr:$src)>;
-def : Pat<(ctlz_zero_undef GR16:$src), (XOR16ri (BSR16rr GR16:$src), 15)>;
-def : Pat<(ctlz_zero_undef GR32:$src), (XOR32ri (BSR32rr GR32:$src), 31)>;
-def : Pat<(ctlz_zero_undef GR64:$src), (XOR64ri8 (BSR64rr GR64:$src), 63)>;
-def : Pat<(ctlz_zero_undef (loadi16 addr:$src)),
- (XOR16ri (BSR16rm addr:$src), 15)>;
-def : Pat<(ctlz_zero_undef (loadi32 addr:$src)),
- (XOR32ri (BSR32rm addr:$src), 31)>;
-def : Pat<(ctlz_zero_undef (loadi64 addr:$src)),
- (XOR64ri8 (BSR64rm addr:$src), 63)>;
; CHECK: t3:
; CHECK: bsrw
; CHECK-NOT: cmov
-; CHECK: xorw $15,
+; CHECK: xorl $15,
; CHECK: ret
}
%tmp1 = tail call i32 @llvm.ctlz.i32(i32 %or, i1 false)
ret i32 %tmp1
}
+
+define i32 @t6(i32 %n) nounwind {
+entry:
+; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute
+; the most significant bit, which is what 'bsr' does natively.
+; CHECK: t6:
+; CHECK: bsrl
+; CHECK-NOT: xorl
+; CHECK: ret
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %n, i1 true)
+ %bsr = xor i32 %ctlz, 31
+ ret i32 %bsr
+}
+
+define i32 @t7(i32 %n) nounwind {
+entry:
+; Same as t6, but ensure this happens even when there is a potential zero.
+; CHECK: t7:
+; CHECK: bsrl
+; CHECK-NOT: xorl
+; CHECK: ret
+ %ctlz = tail call i32 @llvm.ctlz.i32(i32 %n, i1 false)
+ %bsr = xor i32 %ctlz, 31
+ ret i32 %bsr
+}