From: Dan Gohman Date: Wed, 6 Jan 2016 00:43:06 +0000 (+0000) Subject: [SelectionDAGBuilder] Set NoUnsignedWrap for inbounds gep and load/store offsets. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=0fcad92ee35f13d1d7f4da235f11c684e95640b8;p=oota-llvm.git [SelectionDAGBuilder] Set NoUnsignedWrap for inbounds gep and load/store offsets. In an inbounds getelementptr, when an index produces a constant non-negative offset to add to the base, the add can be assumed to not have unsigned overflow. This relies on the assumption that addresses can't occupy more than half the address space, which isn't possible in C because it wouldn't be possible to represent the difference between the start of the object and one-past-the-end in a ptrdiff_t. Setting the NoUnsignedWrap flag is theoretically useful in general, and is specifically useful to the WebAssembly backend, since it permits stronger constant offset folding. Differential Revision: http://reviews.llvm.org/D15544 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@256890 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0872d7a9a22..bc2405b952a 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6843,9 +6843,13 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { uint64_t PtrOff = ShAmt / 8; unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); SDLoc DL(LN0); + // The original load itself didn't wrap, so an offset within it doesn't. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, LN0->getBasePtr(), - DAG.getConstant(PtrOff, DL, PtrType)); + DAG.getConstant(PtrOff, DL, PtrType), + &Flags); AddToWorklist(NewPtr.getNode()); SDValue Load; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 94550d60b36..e446a934554 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1329,12 +1329,18 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); + // An aggregate return value cannot wrap around the address space, so + // offsets to its parts don't wrap either. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + SmallVector Chains(NumValues); for (unsigned i = 0; i != NumValues; ++i) { SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), RetPtr.getValueType(), RetPtr, DAG.getIntPtrConstant(Offsets[i], - getCurSDLoc())); + getCurSDLoc()), + &Flags); Chains[i] = DAG.getStore(Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), @@ -2994,8 +3000,15 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (Field) { // N = N + Offset uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); + + // In an inbouds GEP with an offset that is nonnegative even when + // interpreted as signed, assume there is no unsigned overflow. + SDNodeFlags Flags; + if (int64_t(Offset) >= 0 && cast(I).isInBounds()) + Flags.setNoUnsignedWrap(true); + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, - DAG.getConstant(Offset, dl, N.getValueType())); + DAG.getConstant(Offset, dl, N.getValueType()), &Flags); } Ty = StTy->getElementType(Field); @@ -3020,7 +3033,14 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { SDValue OffsVal = VectorWidth ? DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) : DAG.getConstant(Offs, dl, PtrTy); - N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal); + + // In an inbouds GEP with an offset that is nonnegative even when + // interpreted as signed, assume there is no unsigned overflow. + SDNodeFlags Flags; + if (Offs.isNonNegative() && cast(I).isInBounds()) + Flags.setNoUnsignedWrap(true); + + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, &Flags); continue; } @@ -3092,10 +3112,13 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { Align = 0; // Round the size of the allocation up to the stack alignment size - // by add SA-1 to the size. + // by add SA-1 to the size. This doesn't overflow because we're computing + // an address inside an alloca. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(StackAlign - 1, dl)); + DAG.getIntPtrConstant(StackAlign - 1, dl), &Flags); // Mask out the low bits for alignment purposes. AllocSize = DAG.getNode(ISD::AND, dl, @@ -3168,6 +3191,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (isVolatile) Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG); + // An aggregate load cannot wrap around the address space, so offsets to its + // parts don't wrap either. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + SmallVector Values(NumValues); SmallVector Chains(std::min(MaxParallelChains, NumValues)); EVT PtrVT = Ptr.getValueType(); @@ -3188,7 +3216,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } SDValue A = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, - DAG.getConstant(Offsets[i], dl, PtrVT)); + DAG.getConstant(Offsets[i], dl, PtrVT), + &Flags); SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), isVolatile, isNonTemporal, isInvariant, Alignment, AAInfo, @@ -3243,6 +3272,11 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { AAMDNodes AAInfo; I.getAAMetadata(AAInfo); + // An aggregate load cannot wrap around the address space, so offsets to its + // parts don't wrap either. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // See visitLoad comments. @@ -3253,7 +3287,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { ChainI = 0; } SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, - DAG.getConstant(Offsets[i], dl, PtrVT)); + DAG.getConstant(Offsets[i], dl, PtrVT), &Flags); SDValue St = DAG.getStore(Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add, MachinePointerInfo(PtrV, Offsets[i]), @@ -7202,10 +7236,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { ReturnValues.resize(NumValues); SmallVector Chains(NumValues); + // An aggregate return value cannot wrap around the address space, so + // offsets to its parts don't wrap either. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + for (unsigned i = 0; i < NumValues; ++i) { SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, CLI.DAG.getConstant(Offsets[i], CLI.DL, - PtrVT)); + PtrVT), &Flags); SDValue L = CLI.DAG.getLoad( RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), diff --git a/test/CodeGen/WebAssembly/offset.ll b/test/CodeGen/WebAssembly/offset.ll index 75a0bc9ab6c..901801d7dbb 100644 --- a/test/CodeGen/WebAssembly/offset.ll +++ b/test/CodeGen/WebAssembly/offset.ll @@ -17,6 +17,28 @@ define i32 @load_i32_with_folded_offset(i32* %p) { ret i32 %t } +; With an inbounds gep, we can fold an offset. + +; CHECK-LABEL: load_i32_with_folded_gep_offset: +; CHECK: i32.load $push0=, 24($0){{$}} +define i32 @load_i32_with_folded_gep_offset(i32* %p) { + %s = getelementptr inbounds i32, i32* %p, i32 6 + %t = load i32, i32* %s + ret i32 %t +} + +; We can't fold a negative offset though, even with an inbounds gep. + +; CHECK-LABEL: load_i32_with_unfolded_gep_negative_offset: +; CHECK: i32.const $push0=, -24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.load $push2=, 0($pop1){{$}} +define i32 @load_i32_with_unfolded_gep_negative_offset(i32* %p) { + %s = getelementptr inbounds i32, i32* %p, i32 -6 + %t = load i32, i32* %s + ret i32 %t +} + ; Without nuw, and even with nsw, we can't fold an offset. ; CHECK-LABEL: load_i32_with_unfolded_offset: @@ -31,6 +53,18 @@ define i32 @load_i32_with_unfolded_offset(i32* %p) { ret i32 %t } +; Without inbounds, we can't fold a gep offset. + +; CHECK-LABEL: load_i32_with_unfolded_gep_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.load $push2=, 0($pop1){{$}} +define i32 @load_i32_with_unfolded_gep_offset(i32* %p) { + %s = getelementptr i32, i32* %p, i32 6 + %t = load i32, i32* %s + ret i32 %t +} + ; Same as above but with i64. ; CHECK-LABEL: load_i64_with_folded_offset: @@ -45,6 +79,28 @@ define i64 @load_i64_with_folded_offset(i64* %p) { ; Same as above but with i64. +; CHECK-LABEL: load_i64_with_folded_gep_offset: +; CHECK: i64.load $push0=, 24($0){{$}} +define i64 @load_i64_with_folded_gep_offset(i64* %p) { + %s = getelementptr inbounds i64, i64* %p, i32 3 + %t = load i64, i64* %s + ret i64 %t +} + +; Same as above but with i64. + +; CHECK-LABEL: load_i64_with_unfolded_gep_negative_offset: +; CHECK: i32.const $push0=, -24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.load $push2=, 0($pop1){{$}} +define i64 @load_i64_with_unfolded_gep_negative_offset(i64* %p) { + %s = getelementptr inbounds i64, i64* %p, i32 -3 + %t = load i64, i64* %s + ret i64 %t +} + +; Same as above but with i64. + ; CHECK-LABEL: load_i64_with_unfolded_offset: ; CHECK: i32.const $push0=, 24{{$}} ; CHECK: i32.add $push1=, $0, $pop0{{$}} @@ -57,6 +113,18 @@ define i64 @load_i64_with_unfolded_offset(i64* %p) { ret i64 %t } +; Same as above but with i64. + +; CHECK-LABEL: load_i64_with_unfolded_gep_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.load $push2=, 0($pop1){{$}} +define i64 @load_i64_with_unfolded_gep_offset(i64* %p) { + %s = getelementptr i64, i64* %p, i32 3 + %t = load i64, i64* %s + ret i64 %t +} + ; Same as above but with store. ; CHECK-LABEL: store_i32_with_folded_offset: @@ -71,6 +139,28 @@ define void @store_i32_with_folded_offset(i32* %p) { ; Same as above but with store. +; CHECK-LABEL: store_i32_with_folded_gep_offset: +; CHECK: i32.store $discard=, 24($0), $pop0{{$}} +define void @store_i32_with_folded_gep_offset(i32* %p) { + %s = getelementptr inbounds i32, i32* %p, i32 6 + store i32 0, i32* %s + ret void +} + +; Same as above but with store. + +; CHECK-LABEL: store_i32_with_unfolded_gep_negative_offset: +; CHECK: i32.const $push0=, -24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.store $discard=, 0($pop1), $pop2{{$}} +define void @store_i32_with_unfolded_gep_negative_offset(i32* %p) { + %s = getelementptr inbounds i32, i32* %p, i32 -6 + store i32 0, i32* %s + ret void +} + +; Same as above but with store. + ; CHECK-LABEL: store_i32_with_unfolded_offset: ; CHECK: i32.const $push0=, 24{{$}} ; CHECK: i32.add $push1=, $0, $pop0{{$}} @@ -83,6 +173,18 @@ define void @store_i32_with_unfolded_offset(i32* %p) { ret void } +; Same as above but with store. + +; CHECK-LABEL: store_i32_with_unfolded_gep_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i32.store $discard=, 0($pop1), $pop2{{$}} +define void @store_i32_with_unfolded_gep_offset(i32* %p) { + %s = getelementptr i32, i32* %p, i32 6 + store i32 0, i32* %s + ret void +} + ; Same as above but with store with i64. ; CHECK-LABEL: store_i64_with_folded_offset: @@ -97,6 +199,28 @@ define void @store_i64_with_folded_offset(i64* %p) { ; Same as above but with store with i64. +; CHECK-LABEL: store_i64_with_folded_gep_offset: +; CHECK: i64.store $discard=, 24($0), $pop0{{$}} +define void @store_i64_with_folded_gep_offset(i64* %p) { + %s = getelementptr inbounds i64, i64* %p, i32 3 + store i64 0, i64* %s + ret void +} + +; Same as above but with store with i64. + +; CHECK-LABEL: store_i64_with_unfolded_gep_negative_offset: +; CHECK: i32.const $push0=, -24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.store $discard=, 0($pop1), $pop2{{$}} +define void @store_i64_with_unfolded_gep_negative_offset(i64* %p) { + %s = getelementptr inbounds i64, i64* %p, i32 -3 + store i64 0, i64* %s + ret void +} + +; Same as above but with store with i64. + ; CHECK-LABEL: store_i64_with_unfolded_offset: ; CHECK: i32.const $push0=, 24{{$}} ; CHECK: i32.add $push1=, $0, $pop0{{$}} @@ -109,6 +233,18 @@ define void @store_i64_with_unfolded_offset(i64* %p) { ret void } +; Same as above but with store with i64. + +; CHECK-LABEL: store_i64_with_unfolded_gep_offset: +; CHECK: i32.const $push0=, 24{{$}} +; CHECK: i32.add $push1=, $0, $pop0{{$}} +; CHECK: i64.store $discard=, 0($pop1), $pop2{{$}} +define void @store_i64_with_unfolded_gep_offset(i64* %p) { + %s = getelementptr i64, i64* %p, i32 3 + store i64 0, i64* %s + ret void +} + ; When loading from a fixed address, materialize a zero. ; CHECK-LABEL: load_i32_from_numeric_address @@ -159,6 +295,17 @@ define i32 @load_i8_s_with_folded_offset(i8* %p) { ret i32 %u } +; Fold a gep offset into a sign-extending load. + +; CHECK-LABEL: load_i8_s_with_folded_gep_offset: +; CHECK: i32.load8_s $push0=, 24($0){{$}} +define i32 @load_i8_s_with_folded_gep_offset(i8* %p) { + %s = getelementptr inbounds i8, i8* %p, i32 24 + %t = load i8, i8* %s + %u = sext i8 %t to i32 + ret i32 %u +} + ; Fold an offset into a zero-extending load. ; CHECK-LABEL: load_i8_u_with_folded_offset: @@ -172,6 +319,17 @@ define i32 @load_i8_u_with_folded_offset(i8* %p) { ret i32 %u } +; Fold a gep offset into a zero-extending load. + +; CHECK-LABEL: load_i8_u_with_folded_gep_offset: +; CHECK: i32.load8_u $push0=, 24($0){{$}} +define i32 @load_i8_u_with_folded_gep_offset(i8* %p) { + %s = getelementptr inbounds i8, i8* %p, i32 24 + %t = load i8, i8* %s + %u = zext i8 %t to i32 + ret i32 %u +} + ; Fold an offset into a truncating store. ; CHECK-LABEL: store_i8_with_folded_offset: @@ -183,3 +341,43 @@ define void @store_i8_with_folded_offset(i8* %p) { store i8 0, i8* %s ret void } + +; Fold a gep offset into a truncating store. + +; CHECK-LABEL: store_i8_with_folded_gep_offset: +; CHECK: i32.store8 $discard=, 24($0), $pop0{{$}} +define void @store_i8_with_folded_gep_offset(i8* %p) { + %s = getelementptr inbounds i8, i8* %p, i32 24 + store i8 0, i8* %s + ret void +} + +; Fold the offsets when lowering aggregate loads and stores. + +; CHECK-LABEL: aggregate_load_store: +; CHECK: i32.load $2=, 0($0){{$}} +; CHECK: i32.load $3=, 4($0){{$}} +; CHECK: i32.load $4=, 8($0){{$}} +; CHECK: i32.load $push0=, 12($0){{$}} +; CHECK: i32.store $discard=, 12($1), $pop0{{$}} +; CHECK: i32.store $discard=, 8($1), $4{{$}} +; CHECK: i32.store $discard=, 4($1), $3{{$}} +; CHECK: i32.store $discard=, 0($1), $2{{$}} +define void @aggregate_load_store({i32,i32,i32,i32}* %p, {i32,i32,i32,i32}* %q) { + ; volatile so that things stay in order for the tests above + %t = load volatile {i32,i32,i32,i32}, {i32, i32,i32,i32}* %p + store volatile {i32,i32,i32,i32} %t, {i32, i32,i32,i32}* %q + ret void +} + +; Fold the offsets when lowering aggregate return values. + +; CHECK-LABEL: aggregate_return: +; CHECK: i32.const $push0=, 0{{$}} +; CHECK: i32.store $push1=, 12($0), $pop0{{$}} +; CHECK: i32.store $push2=, 8($0), $pop1{{$}} +; CHECK: i32.store $push3=, 4($0), $pop2{{$}} +; CHECK: i32.store $discard=, 0($0), $pop3{{$}} +define {i32,i32,i32,i32} @aggregate_return() { + ret {i32,i32,i32,i32} zeroinitializer +}