[SelectionDAGBuilder] Set NoUnsignedWrap for inbounds gep and load/store offsets.

author Dan Gohman <dan433584@gmail.com>

Wed, 6 Jan 2016 00:43:06 +0000 (00:43 +0000)

committer Dan Gohman <dan433584@gmail.com>

Wed, 6 Jan 2016 00:43:06 +0000 (00:43 +0000)
author Dan Gohman <dan433584@gmail.com>
Wed, 6 Jan 2016 00:43:06 +0000 (00:43 +0000)
committer Dan Gohman <dan433584@gmail.com>
Wed, 6 Jan 2016 00:43:06 +0000 (00:43 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 0872d7a9a22828a8a095f9cc8712b7dc3a4a226b..bc2405b952a60dd6d90ea2c8b447ce40709655a5 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6843,9 +6843,13 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
    uint64_t PtrOff = ShAmt / 8;
    unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
    SDLoc DL(LN0);
+  // The original load itself didn't wrap, so an offset within it doesn't.
+  SDNodeFlags Flags;
+  Flags.setNoUnsignedWrap(true);
    SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
                                 PtrType, LN0->getBasePtr(),
-                               DAG.getConstant(PtrOff, DL, PtrType));
+                               DAG.getConstant(PtrOff, DL, PtrType),
+                               &Flags);
    AddToWorklist(NewPtr.getNode());
  
    SDValue Load;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index 94550d60b36a8e9abf9e72aa98d802d22aead6c5..e446a934554e2c7f97490192fbd7ab366b487cf9 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1329,12 +1329,18 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
      ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
      unsigned NumValues = ValueVTs.size();
  
+    // An aggregate return value cannot wrap around the address space, so
+    // offsets to its parts don't wrap either.
+    SDNodeFlags Flags;
+    Flags.setNoUnsignedWrap(true);
+
      SmallVector<SDValue, 4> Chains(NumValues);
      for (unsigned i = 0; i != NumValues; ++i) {
        SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(),
                                  RetPtr.getValueType(), RetPtr,
                                  DAG.getIntPtrConstant(Offsets[i],
-                                                      getCurSDLoc()));
+                                                      getCurSDLoc()),
+                                &Flags);
        Chains[i] =
          DAG.getStore(Chain, getCurSDLoc(),
                       SDValue(RetOp.getNode(), RetOp.getResNo() + i),
@@ -2994,8 +3000,15 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
        if (Field) {
          // N = N + Offset
          uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
+
+        // In an inbouds GEP with an offset that is nonnegative even when
+        // interpreted as signed, assume there is no unsigned overflow.
+        SDNodeFlags Flags;
+        if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
+          Flags.setNoUnsignedWrap(true);
+
          N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
-                        DAG.getConstant(Offset, dl, N.getValueType()));
+                        DAG.getConstant(Offset, dl, N.getValueType()), &Flags);
        }
  
        Ty = StTy->getElementType(Field);
@@ -3020,7 +3033,14 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
          SDValue OffsVal = VectorWidth ?
            DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) :
            DAG.getConstant(Offs, dl, PtrTy);
-        N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal);
+
+        // In an inbouds GEP with an offset that is nonnegative even when
+        // interpreted as signed, assume there is no unsigned overflow.
+        SDNodeFlags Flags;
+        if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
+          Flags.setNoUnsignedWrap(true);
+
+        N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, &Flags);
          continue;
        }
  
@@ -3092,10 +3112,13 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
      Align = 0;
  
    // Round the size of the allocation up to the stack alignment size
-  // by add SA-1 to the size.
+  // by add SA-1 to the size. This doesn't overflow because we're computing
+  // an address inside an alloca.
+  SDNodeFlags Flags;
+  Flags.setNoUnsignedWrap(true);
    AllocSize = DAG.getNode(ISD::ADD, dl,
                            AllocSize.getValueType(), AllocSize,
-                          DAG.getIntPtrConstant(StackAlign - 1, dl));
+                          DAG.getIntPtrConstant(StackAlign - 1, dl), &Flags);
  
    // Mask out the low bits for alignment purposes.
    AllocSize = DAG.getNode(ISD::AND, dl,
@@ -3168,6 +3191,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
    if (isVolatile)
      Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
  
+  // An aggregate load cannot wrap around the address space, so offsets to its
+  // parts don't wrap either.
+  SDNodeFlags Flags;
+  Flags.setNoUnsignedWrap(true);
+
    SmallVector<SDValue, 4> Values(NumValues);
    SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
    EVT PtrVT = Ptr.getValueType();
@@ -3188,7 +3216,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
      }
      SDValue A = DAG.getNode(ISD::ADD, dl,
                              PtrVT, Ptr,
-                            DAG.getConstant(Offsets[i], dl, PtrVT));
+                            DAG.getConstant(Offsets[i], dl, PtrVT),
+                            &Flags);
      SDValue L = DAG.getLoad(ValueVTs[i], dl, Root,
                              A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
                              isNonTemporal, isInvariant, Alignment, AAInfo,
@@ -3243,6 +3272,11 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
    AAMDNodes AAInfo;
    I.getAAMetadata(AAInfo);
  
+  // An aggregate load cannot wrap around the address space, so offsets to its
+  // parts don't wrap either.
+  SDNodeFlags Flags;
+  Flags.setNoUnsignedWrap(true);
+
    unsigned ChainI = 0;
    for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
      // See visitLoad comments.
@@ -3253,7 +3287,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
        ChainI = 0;
      }
      SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
-                              DAG.getConstant(Offsets[i], dl, PtrVT));
+                              DAG.getConstant(Offsets[i], dl, PtrVT), &Flags);
      SDValue St = DAG.getStore(Root, dl,
                                SDValue(Src.getNode(), Src.getResNo() + i),
                                Add, MachinePointerInfo(PtrV, Offsets[i]),
@@ -7202,10 +7236,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
      ReturnValues.resize(NumValues);
      SmallVector<SDValue, 4> Chains(NumValues);
  
+    // An aggregate return value cannot wrap around the address space, so
+    // offsets to its parts don't wrap either.
+    SDNodeFlags Flags;
+    Flags.setNoUnsignedWrap(true);
+
      for (unsigned i = 0; i < NumValues; ++i) {
        SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
                                      CLI.DAG.getConstant(Offsets[i], CLI.DL,
-                                                        PtrVT));
+                                                        PtrVT), &Flags);
        SDValue L = CLI.DAG.getLoad(
            RetTys[i], CLI.DL, CLI.Chain, Add,
            MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
diff --git a/test/CodeGen/WebAssembly/offset.ll b/test/CodeGen/WebAssembly/offset.ll

index 75a0bc9ab6c60af33be946b26d1487747d15b43d..901801d7dbbe2938400c42951757e0bbece5fec8 100644 (file)
--- a/test/CodeGen/WebAssembly/offset.ll
+++ b/test/CodeGen/WebAssembly/offset.ll
@@ -17,6 +17,28 @@ define i32 @load_i32_with_folded_offset(i32* %p) {
    ret i32 %t
  }
  
+; With an inbounds gep, we can fold an offset.
+
+; CHECK-LABEL: load_i32_with_folded_gep_offset:
+; CHECK: i32.load  $push0=, 24($0){{$}}
+define i32 @load_i32_with_folded_gep_offset(i32* %p) {
+  %s = getelementptr inbounds i32, i32* %p, i32 6
+  %t = load i32, i32* %s
+  ret i32 %t
+}
+
+; We can't fold a negative offset though, even with an inbounds gep.
+
+; CHECK-LABEL: load_i32_with_unfolded_gep_negative_offset:
+; CHECK: i32.const $push0=, -24{{$}}
+; CHECK: i32.add   $push1=, $0, $pop0{{$}}
+; CHECK: i32.load  $push2=, 0($pop1){{$}}
+define i32 @load_i32_with_unfolded_gep_negative_offset(i32* %p) {
+  %s = getelementptr inbounds i32, i32* %p, i32 -6
+  %t = load i32, i32* %s
+  ret i32 %t
+}
+
  ; Without nuw, and even with nsw, we can't fold an offset.
  
  ; CHECK-LABEL: load_i32_with_unfolded_offset:
@@ -31,6 +53,18 @@ define i32 @load_i32_with_unfolded_offset(i32* %p) {
    ret i32 %t
  }
  
+; Without inbounds, we can't fold a gep offset.
+
+; CHECK-LABEL: load_i32_with_unfolded_gep_offset:
+; CHECK: i32.const $push0=, 24{{$}}
+; CHECK: i32.add   $push1=, $0, $pop0{{$}}
+; CHECK: i32.load  $push2=, 0($pop1){{$}}
+define i32 @load_i32_with_unfolded_gep_offset(i32* %p) {
+  %s = getelementptr i32, i32* %p, i32 6
+  %t = load i32, i32* %s
+  ret i32 %t
+}
+
  ; Same as above but with i64.
  
  ; CHECK-LABEL: load_i64_with_folded_offset:
@@ -45,6 +79,28 @@ define i64 @load_i64_with_folded_offset(i64* %p) {
  
  ; Same as above but with i64.
  
+; CHECK-LABEL: load_i64_with_folded_gep_offset:
+; CHECK: i64.load  $push0=, 24($0){{$}}
+define i64 @load_i64_with_folded_gep_offset(i64* %p) {
+  %s = getelementptr inbounds i64, i64* %p, i32 3
+  %t = load i64, i64* %s
+  ret i64 %t
+}
+
+; Same as above but with i64.
+
+; CHECK-LABEL: load_i64_with_unfolded_gep_negative_offset:
+; CHECK: i32.const $push0=, -24{{$}}
+; CHECK: i32.add   $push1=, $0, $pop0{{$}}
+; CHECK: i64.load  $push2=, 0($pop1){{$}}
+define i64 @load_i64_with_unfolded_gep_negative_offset(i64* %p) {
+  %s = getelementptr inbounds i64, i64* %p, i32 -3
+  %t = load i64, i64* %s
+  ret i64 %t
+}
+
+; Same as above but with i64.
+
  ; CHECK-LABEL: load_i64_with_unfolded_offset:
  ; CHECK: i32.const $push0=, 24{{$}}
  ; CHECK: i32.add   $push1=, $0, $pop0{{$}}
@@ -57,6 +113,18 @@ define i64 @load_i64_with_unfolded_offset(i64* %p) {
    ret i64 %t
  }
  
+; Same as above but with i64.
+
+; CHECK-LABEL: load_i64_with_unfolded_gep_offset:
+; CHECK: i32.const $push0=, 24{{$}}
+; CHECK: i32.add   $push1=, $0, $pop0{{$}}
+; CHECK: i64.load  $push2=, 0($pop1){{$}}
+define i64 @load_i64_with_unfolded_gep_offset(i64* %p) {
+  %s = getelementptr i64, i64* %p, i32 3
+  %t = load i64, i64* %s
+  ret i64 %t
+}
+
  ; Same as above but with store.
  
  ; CHECK-LABEL: store_i32_with_folded_offset:
@@ -71,6 +139,28 @@ define void @store_i32_with_folded_offset(i32* %p) {
  
  ; Same as above but with store.
  
+; CHECK-LABEL: store_i32_with_folded_gep_offset:
+; CHECK: i32.store $discard=, 24($0), $pop0{{$}}
+define void @store_i32_with_folded_gep_offset(i32* %p) {
+  %s = getelementptr inbounds i32, i32* %p, i32 6
+  store i32 0, i32* %s
+  ret void
+}
+
+; Same as above but with store.
+
+; CHECK-LABEL: store_i32_with_unfolded_gep_negative_offset:
+; CHECK: i32.const $push0=, -24{{$}}
+; CHECK: i32.add   $push1=, $0, $pop0{{$}}
+; CHECK: i32.store $discard=, 0($pop1), $pop2{{$}}
+define void @store_i32_with_unfolded_gep_negative_offset(i32* %p) {
+  %s = getelementptr inbounds i32, i32* %p, i32 -6
+  store i32 0, i32* %s
+  ret void
+}
+
+; Same as above but with store.
+
  ; CHECK-LABEL: store_i32_with_unfolded_offset:
  ; CHECK: i32.const $push0=, 24{{$}}
  ; CHECK: i32.add   $push1=, $0, $pop0{{$}}
@@ -83,6 +173,18 @@ define void @store_i32_with_unfolded_offset(i32* %p) {
    ret void
  }
  
+; Same as above but with store.
+
+; CHECK-LABEL: store_i32_with_unfolded_gep_offset:
+; CHECK: i32.const $push0=, 24{{$}}
+; CHECK: i32.add   $push1=, $0, $pop0{{$}}
+; CHECK: i32.store $discard=, 0($pop1), $pop2{{$}}
+define void @store_i32_with_unfolded_gep_offset(i32* %p) {
+  %s = getelementptr i32, i32* %p, i32 6
+  store i32 0, i32* %s
+  ret void
+}
+
  ; Same as above but with store with i64.
  
  ; CHECK-LABEL: store_i64_with_folded_offset:
@@ -97,6 +199,28 @@ define void @store_i64_with_folded_offset(i64* %p) {
  
  ; Same as above but with store with i64.
  
+; CHECK-LABEL: store_i64_with_folded_gep_offset:
+; CHECK: i64.store $discard=, 24($0), $pop0{{$}}
+define void @store_i64_with_folded_gep_offset(i64* %p) {
+  %s = getelementptr inbounds i64, i64* %p, i32 3
+  store i64 0, i64* %s
+  ret void
+}
+
+; Same as above but with store with i64.
+
+; CHECK-LABEL: store_i64_with_unfolded_gep_negative_offset:
+; CHECK: i32.const $push0=, -24{{$}}
+; CHECK: i32.add   $push1=, $0, $pop0{{$}}
+; CHECK: i64.store $discard=, 0($pop1), $pop2{{$}}
+define void @store_i64_with_unfolded_gep_negative_offset(i64* %p) {
+  %s = getelementptr inbounds i64, i64* %p, i32 -3
+  store i64 0, i64* %s
+  ret void
+}
+
+; Same as above but with store with i64.
+
  ; CHECK-LABEL: store_i64_with_unfolded_offset:
  ; CHECK: i32.const $push0=, 24{{$}}
  ; CHECK: i32.add   $push1=, $0, $pop0{{$}}
@@ -109,6 +233,18 @@ define void @store_i64_with_unfolded_offset(i64* %p) {
    ret void
  }
  
+; Same as above but with store with i64.
+
+; CHECK-LABEL: store_i64_with_unfolded_gep_offset:
+; CHECK: i32.const $push0=, 24{{$}}
+; CHECK: i32.add   $push1=, $0, $pop0{{$}}
+; CHECK: i64.store $discard=, 0($pop1), $pop2{{$}}
+define void @store_i64_with_unfolded_gep_offset(i64* %p) {
+  %s = getelementptr i64, i64* %p, i32 3
+  store i64 0, i64* %s
+  ret void
+}
+
  ; When loading from a fixed address, materialize a zero.
  
  ; CHECK-LABEL: load_i32_from_numeric_address
@@ -159,6 +295,17 @@ define i32 @load_i8_s_with_folded_offset(i8* %p) {
    ret i32 %u
  }
  
+; Fold a gep offset into a sign-extending load.
+
+; CHECK-LABEL: load_i8_s_with_folded_gep_offset:
+; CHECK: i32.load8_s $push0=, 24($0){{$}}
+define i32 @load_i8_s_with_folded_gep_offset(i8* %p) {
+  %s = getelementptr inbounds i8, i8* %p, i32 24
+  %t = load i8, i8* %s
+  %u = sext i8 %t to i32
+  ret i32 %u
+}
+
  ; Fold an offset into a zero-extending load.
  
  ; CHECK-LABEL: load_i8_u_with_folded_offset:
@@ -172,6 +319,17 @@ define i32 @load_i8_u_with_folded_offset(i8* %p) {
    ret i32 %u
  }
  
+; Fold a gep offset into a zero-extending load.
+
+; CHECK-LABEL: load_i8_u_with_folded_gep_offset:
+; CHECK: i32.load8_u $push0=, 24($0){{$}}
+define i32 @load_i8_u_with_folded_gep_offset(i8* %p) {
+  %s = getelementptr inbounds i8, i8* %p, i32 24
+  %t = load i8, i8* %s
+  %u = zext i8 %t to i32
+  ret i32 %u
+}
+
  ; Fold an offset into a truncating store.
  
  ; CHECK-LABEL: store_i8_with_folded_offset:
@@ -183,3 +341,43 @@ define void @store_i8_with_folded_offset(i8* %p) {
    store i8 0, i8* %s
    ret void
  }
+
+; Fold a gep offset into a truncating store.
+
+; CHECK-LABEL: store_i8_with_folded_gep_offset:
+; CHECK: i32.store8 $discard=, 24($0), $pop0{{$}}
+define void @store_i8_with_folded_gep_offset(i8* %p) {
+  %s = getelementptr inbounds i8, i8* %p, i32 24
+  store i8 0, i8* %s
+  ret void
+}
+
+; Fold the offsets when lowering aggregate loads and stores.
+
+; CHECK-LABEL: aggregate_load_store:
+; CHECK: i32.load  $2=, 0($0){{$}}
+; CHECK: i32.load  $3=, 4($0){{$}}
+; CHECK: i32.load  $4=, 8($0){{$}}
+; CHECK: i32.load  $push0=, 12($0){{$}}
+; CHECK: i32.store $discard=, 12($1), $pop0{{$}}
+; CHECK: i32.store $discard=, 8($1), $4{{$}}
+; CHECK: i32.store $discard=, 4($1), $3{{$}}
+; CHECK: i32.store $discard=, 0($1), $2{{$}}
+define void @aggregate_load_store({i32,i32,i32,i32}* %p, {i32,i32,i32,i32}* %q) {
+  ; volatile so that things stay in order for the tests above
+  %t = load volatile {i32,i32,i32,i32}, {i32, i32,i32,i32}* %p
+  store volatile {i32,i32,i32,i32} %t, {i32, i32,i32,i32}* %q
+  ret void
+}
+
+; Fold the offsets when lowering aggregate return values.
+
+; CHECK-LABEL: aggregate_return:
+; CHECK: i32.const   $push0=, 0{{$}}
+; CHECK: i32.store   $push1=, 12($0), $pop0{{$}}
+; CHECK: i32.store   $push2=, 8($0), $pop1{{$}}
+; CHECK: i32.store   $push3=, 4($0), $pop2{{$}}
+; CHECK: i32.store   $discard=, 0($0), $pop3{{$}}
+define {i32,i32,i32,i32} @aggregate_return() {
+  ret {i32,i32,i32,i32} zeroinitializer
+}
author	Dan Gohman <dan433584@gmail.com>
	Wed, 6 Jan 2016 00:43:06 +0000 (00:43 +0000)
committer	Dan Gohman <dan433584@gmail.com>
	Wed, 6 Jan 2016 00:43:06 +0000 (00:43 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp		patch \| blob \| history
test/CodeGen/WebAssembly/offset.ll		patch \| blob \| history