Summary:
Previously SelectionDAGBuilder asserted that the pointer operands of
memcpy / memset / memmove intrinsics are in address space < 256. This assert
implicitly assumed the X86 backend, where all address spaces < 256 are
equivalent to address space 0 from the code generator's point of view. On some
targets (R600 and NVPTX) several address spaces < 256 have a target-defined
meaning, so this assert made little sense for these targets.
This patch removes this wrong assertion and adds extra checks before lowering
these intrinsics to library calls. If a pointer operand can't be casted to
address space 0 without changing semantics, a fatal error is reported to the
user.
The new behavior should be valid for all targets that give address spaces != 0
a target-specified meaning (NVPTX, R600, X86). NVPTX lowers big or
variable-sized memory intrinsics before SelectionDAG construction. All other
memory intrinsics are inlined (the threshold is set very high for this target).
R600 doesn't support memcpy / memset / memmove library calls (previously the
illegal emission of a call to such library function triggered an error
somewhere in the code generator). X86 now emits inline loads and stores for
address spaces 256 and 257 up to the same threshold that is used for address
space 0 and reports a fatal error otherwise.
I call this a "partial fix" because there are still cases that can't be
lowered. A fatal error is reported in these cases.
Reviewers: arsenm, theraven, compnerd, hfinkel
Subscribers: hfinkel, llvm-commits, alex
Differential Revision: http://reviews.llvm.org/D7241
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255441
91177308-0d34-0410-b5e6-
96231b3b80d8
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
+static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
+ unsigned AS) {
+ // Lowering memcpy / memset / memmove intrinsics to calls is only valid if all
+ // pointer operands can be losslessly bitcasted to pointers of address space 0
+ if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)) {
+ report_fatal_error("cannot lower memory intrinsic in address space " +
+ Twine(AS));
+ }
+}
+
SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVol, bool AlwaysInline,
true, DstPtrInfo, SrcPtrInfo);
}
+ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+ checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace());
+
// FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
// memcpy is not guaranteed to be safe. libc memcpys aren't required to
// respect volatile, so they may do things like read or write memory
return Result;
}
+ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+ checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace());
+
// FIXME: If the memmove is volatile, lowering it to plain libc memmove may
// not be safe. See memcpy above for more details.
return Result;
}
+ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+
// Emit a library call.
Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext());
TargetLowering::ArgListTy Args;
case Intrinsic::longjmp:
return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
case Intrinsic::memcpy: {
- // FIXME: this definition of "user defined address space" is x86-specific
- // Assert for address < 256 since we support only user defined address
- // spaces.
- assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
- < 256 &&
- cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
- < 256 &&
- "Unknown address space");
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
return nullptr;
}
case Intrinsic::memset: {
- // FIXME: this definition of "user defined address space" is x86-specific
- // Assert for address < 256 since we support only user defined address
- // spaces.
- assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
- < 256 &&
- "Unknown address space");
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
return nullptr;
}
case Intrinsic::memmove: {
- // FIXME: this definition of "user defined address space" is x86-specific
- // Assert for address < 256 since we support only user defined address
- // spaces.
- assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
- < 256 &&
- cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
- < 256 &&
- "Unknown address space");
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=DARWIN
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p256i8.p256i8.i64(i8 addrspace(256)* nocapture, i8 addrspace(256)* nocapture, i64, i32, i1) nounwind
; Variable memcpy's should lower to calls.
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 17, i32 0, i1 false)
ret void
}
+
+; Memcpys from / to address space 256 should be lowered to appropriate loads /
+; stores if small enough.
+define void @addrspace256(i8 addrspace(256)* %a, i8 addrspace(256)* %b) nounwind {
+ tail call void @llvm.memcpy.p256i8.p256i8.i64(i8 addrspace(256)* %a, i8 addrspace(256)* %b, i64 16, i32 8, i1 false)
+ ret void
+; LINUX-LABEL: addrspace256:
+; LINUX: movq %gs:
+; LINUX: movq %gs:
+; LINUX: movq {{.*}}, %gs:
+; LINUX: movq {{.*}}, %gs:
+}