case LibFunc::exp2: case LibFunc::exp2f: case LibFunc::exp2l:
case LibFunc::memcmp: case LibFunc::strcmp: case LibFunc::strcpy:
case LibFunc::stpcpy: case LibFunc::strlen: case LibFunc::strnlen:
+ case LibFunc::memchr:
return true;
}
return false;
return std::make_pair(SDValue(), SDValue());
}
+ /// EmitTargetCodeForMemchr - Emit target-specific code that performs a
+ /// memchr, in cases where that is faster than a libcall. The first
+ /// returned SDValue is the result of the memchr and the second is
+ /// the chain. Both SDValues can be null if a normal libcall should
+ /// be used.
+ virtual std::pair<SDValue, SDValue>
+ EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
+ SDValue Src, SDValue Char, SDValue Length,
+ MachinePointerInfo SrcPtrInfo) const {
+ return std::make_pair(SDValue(), SDValue());
+ }
+
/// EmitTargetCodeForStrcpy - Emit target-specific code that performs a
/// strcpy or stpcpy, in cases where that is faster than a libcall.
/// The first returned SDValue is the result of the copy (the start
return false;
}
+/// visitMemChrCall -- See if we can lower a memchr call into an optimized
+/// form. If so, return true and lower it, otherwise return false and it
+/// will be lowered like a normal call.
+bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
+ // Verify that the prototype makes sense. void *memchr(void *, int, size_t)
+ if (I.getNumArgOperands() != 3)
+ return false;
+
+ const Value *Src = I.getArgOperand(0);
+ const Value *Char = I.getArgOperand(1);
+ const Value *Length = I.getArgOperand(2);
+ if (!Src->getType()->isPointerTy() ||
+ !Char->getType()->isIntegerTy() ||
+ !Length->getType()->isIntegerTy() ||
+ !I.getType()->isPointerTy())
+ return false;
+
+ const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
+ std::pair<SDValue, SDValue> Res =
+ TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(),
+ getValue(Src), getValue(Char), getValue(Length),
+ MachinePointerInfo(Src));
+ if (Res.first.getNode()) {
+ setValue(&I, Res.first);
+ PendingLoads.push_back(Res.second);
+ return true;
+ }
+
+ return false;
+}
+
/// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an
/// optimized form. If so, return true and lower it, otherwise return false
/// and it will be lowered like a normal call.
if (visitMemCmpCall(I))
return;
break;
+ case LibFunc::memchr:
+ if (visitMemChrCall(I))
+ return;
+ break;
case LibFunc::strcpy:
if (visitStrCpyCall(I, false))
return;
void visitPHI(const PHINode &I);
void visitCall(const CallInst &I);
bool visitMemCmpCall(const CallInst &I);
+ bool visitMemChrCall(const CallInst &I);
bool visitStrCpyCall(const CallInst &I, bool isStpcpy);
bool visitStrCmpCall(const CallInst &I);
bool visitStrLenCall(const CallInst &I);
const unsigned CCMASK_CS_NE = CCMASK_1;
const unsigned CCMASK_CS = CCMASK_0 | CCMASK_1;
+ // Condition-code mask assignments for a completed SRST loop.
+ const unsigned CCMASK_SRST_FOUND = CCMASK_1;
+ const unsigned CCMASK_SRST_NOTFOUND = CCMASK_2;
+ const unsigned CCMASK_SRST = CCMASK_1 | CCMASK_2;
+
// Return true if Val fits an LLILL operand.
static inline bool isImmLL(uint64_t Val) {
return (Val & ~0x000000000000ffffULL) == 0;
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
// StartMBB:
- // R0W = %CharReg
// # fall through to LoopMMB
- BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0W).addReg(CharReg);
MBB->addSuccessor(LoopMBB);
// LoopMBB:
// %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
// %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
+ // R0W = %CharReg
// %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0W
// JO LoopMBB
// # fall through to DoneMMB
+ //
+ // The load of R0W can be hoisted by post-RA LICM.
MBB = LoopMBB;
- MBB->addLiveIn(SystemZ::R0W);
BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
.addReg(Start1Reg).addMBB(StartMBB)
BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
.addReg(Start2Reg).addMBB(StartMBB)
.addReg(End2Reg).addMBB(LoopMBB);
+ BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0W).addReg(CharReg);
BuildMI(MBB, DL, TII->get(Opcode))
.addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
.addReg(This1Reg).addReg(This2Reg);
return std::make_pair(SDValue(), SDValue());
}
+std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
+EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src, SDValue Char, SDValue Length,
+ MachinePointerInfo SrcPtrInfo) const {
+ // Use SRST to find the character. End is its address on success.
+ EVT PtrVT = Src.getValueType();
+ SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue);
+ Length = DAG.getZExtOrTrunc(Length, DL, PtrVT);
+ Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32);
+ Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char,
+ DAG.getConstant(255, MVT::i32));
+ SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length);
+ SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain,
+ Limit, Src, Char);
+ Chain = End.getValue(1);
+ SDValue Glue = End.getValue(2);
+
+ // Now select between End and null, depending on whether the character
+ // was found.
+ SmallVector<SDValue, 5> Ops;
+ Ops.push_back(End);
+ Ops.push_back(DAG.getConstant(0, PtrVT));
+ Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST, MVT::i32));
+ Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, MVT::i32));
+ Ops.push_back(Glue);
+ VTs = DAG.getVTList(PtrVT, MVT::Glue);
+ End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size());
+ return std::make_pair(End, Chain);
+}
+
std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
SDValue Dest, SDValue Src,
MachinePointerInfo Op1PtrInfo,
MachinePointerInfo Op2PtrInfo) const LLVM_OVERRIDE;
+ virtual std::pair<SDValue, SDValue>
+ EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
+ SDValue Src, SDValue Char, SDValue Length,
+ MachinePointerInfo SrcPtrInfo) const LLVM_OVERRIDE;
+
virtual std::pair<SDValue, SDValue>
EmitTargetCodeForStrcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
SDValue Dest, SDValue Src,
--- /dev/null
+; Test memchr using SRST, with a weird but usable prototype.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i8 *@memchr(i8 *%src, i16 %char, i32 %len)
+
+; Test a simple forwarded call.
+define i8 *@f1(i8 *%src, i16 %char, i32 %len) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lgr [[REG:%r[1-5]]], %r2
+; CHECK-DAG: algfr %r2, %r4
+; CHECK-DAG: llcr %r0, %r3
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: srst %r2, [[REG]]
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK: jl {{\.L.*}}
+; CHECK: lghi %r2, 0
+; CHECK: br %r14
+ %res = call i8 *@memchr(i8 *%src, i16 %char, i32 %len)
+ ret i8 *%res
+}
--- /dev/null
+; Test memchr using SRST, with the correct prototype.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare i8 *@memchr(i8 *%src, i32 %char, i64 %len)
+
+; Test a simple forwarded call.
+define i8 *@f1(i64 %len, i8 *%src, i32 %char) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: agr %r2, %r3
+; CHECK-DAG: llcr %r0, %r4
+; CHECK: [[LABEL:\.[^:]*]]:
+; CHECK: srst %r2, %r3
+; CHECK-NEXT: jo [[LABEL]]
+; CHECK: jl {{\.L.*}}
+; CHECK: lghi %r2, 0
+; CHECK: br %r14
+ %res = call i8 *@memchr(i8 *%src, i32 %char, i64 %len)
+ ret i8 *%res
+}
+
+; Test a doubled call with no use of %r0 in between. There should be a
+; single load of %r0.
+define i8 *@f2(i8 *%src, i8 *%charptr, i64 %len) {
+; CHECK-LABEL: f2:
+; CHECK: llc %r0, 0(%r3)
+; CHECK-NOT: %r0
+; CHECK: srst [[RES1:%r[1-5]]], %r2
+; CHECK-NOT: %r0
+; CHECK: srst %r2, [[RES1]]
+; CHECK: br %r14
+ %char = load volatile i8 *%charptr
+ %charext = zext i8 %char to i32
+ %res1 = call i8 *@memchr(i8 *%src, i32 %charext, i64 %len)
+ %res2 = call i8 *@memchr(i8 *%res1, i32 %charext, i64 %len)
+ ret i8 *%res2
+}
+
+; Test a doubled call with a use of %r0 in between. %r0 must be loaded
+; for each loop.
+define i8 *@f3(i8 *%src, i8 *%charptr, i64 %len) {
+; CHECK-LABEL: f3:
+; CHECK: llc [[CHAR:%r[1-5]]], 0(%r3)
+; CHECK: lr %r0, [[CHAR]]
+; CHECK: srst [[RES1:%r[1-5]]], %r2
+; CHECK: lhi %r0, 0
+; CHECK: blah %r0
+; CHECK: lr %r0, [[CHAR]]
+; CHECK: srst %r2, [[RES1]]
+; CHECK: br %r14
+ %char = load volatile i8 *%charptr
+ %charext = zext i8 %char to i32
+ %res1 = call i8 *@memchr(i8 *%src, i32 %charext, i64 %len)
+ call void asm sideeffect "blah $0", "{r0}" (i32 0)
+ %res2 = call i8 *@memchr(i8 *%res1, i32 %charext, i64 %len)
+ ret i8 *%res2
+}