From c77dec905a58b2308e5efe3a7d1635e4965ccff3 Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Fri, 6 Jun 2014 21:52:55 +0000 Subject: [PATCH] InstCombine: Canonicalize addrspacecast between different element types addrspacecast X addrspace(M)* to Y addrspace(N)* --> bitcast X addrspace(M)* to Y addrspace(M)* addrspacecast Y addrspace(M)* to Y addrspace(N)* Updat all affected tests and add several new tests in addrspacecast.ll. This patch is based on http://reviews.llvm.org/D2186 (authored by Matt Arsenault) with fixes and more tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210375 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/IR/Instructions.cpp | 18 ++--- .../InstCombine/InstCombineCasts.cpp | 24 +++++- test/Transforms/InstCombine/addrspacecast.ll | 80 ++++++++++++++++++- test/Transforms/InstCombine/getelementptr.ll | 17 +++- .../InstCombine/memcpy-from-global.ll | 3 +- 5 files changed, 125 insertions(+), 17 deletions(-) diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index 592cdd85e87..051d63f7fca 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -2331,18 +2331,12 @@ unsigned CastInst::isEliminableCastPair( // Allowed, use first cast's opcode return firstOp; case 14: - // FIXME: this state can be merged with (2), but the following assert - // is useful to check the correcteness of the sequence due to semantic - // change of bitcast. - assert( - SrcTy->isPtrOrPtrVectorTy() && - MidTy->isPtrOrPtrVectorTy() && - DstTy->isPtrOrPtrVectorTy() && - SrcTy->getPointerAddressSpace() == MidTy->getPointerAddressSpace() && - MidTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace() && - "Illegal bitcast, addrspacecast sequence!"); - // Allowed, use second cast's opcode - return secondOp; + // bitcast, addrspacecast -> addrspacecast if the element type of + // bitcast's source is the same as that of addrspacecast's destination. + if (SrcTy->getPointerElementType() == DstTy->getPointerElementType()) + return Instruction::AddrSpaceCast; + return 0; + case 15: // FIXME: this state can be merged with (1), but the following assert // is useful to check the correcteness of the sequence due to semantic diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 356803ad7ca..1b331d118f6 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1434,7 +1434,12 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { if (GetElementPtrInst *GEP = dyn_cast(Src)) { // If casting the result of a getelementptr instruction with no offset, turn // this into a cast of the original pointer! - if (GEP->hasAllZeroIndices()) { + if (GEP->hasAllZeroIndices() && + // If CI is an addrspacecast and GEP changes the poiner type, merging + // GEP into CI would undo canonicalizing addrspacecast with different + // pointer types, causing infinite loops. + (!isa(CI) || + GEP->getType() == GEP->getPointerOperand()->getType())) { // Changing the cast operand is usually not a good idea but it is safe // here because the pointer operand is being replaced with another // pointer operand so the opcode doesn't need to change. @@ -1904,5 +1909,22 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) { + // If the destination pointer element type is not the the same as the source's + // do the addrspacecast to the same type, and then the bitcast in the new + // address space. This allows the cast to be exposed to other transforms. + Value *Src = CI.getOperand(0); + PointerType *SrcTy = cast(Src->getType()->getScalarType()); + PointerType *DestTy = cast(CI.getType()->getScalarType()); + + Type *DestElemTy = DestTy->getElementType(); + if (SrcTy->getElementType() != DestElemTy) { + Type *MidTy = PointerType::get(DestElemTy, SrcTy->getAddressSpace()); + if (CI.getType()->isVectorTy()) // Handle vectors of pointers. + MidTy = VectorType::get(MidTy, CI.getType()->getVectorNumElements()); + + Value *NewBitCast = Builder->CreateBitCast(Src, MidTy); + return new AddrSpaceCastInst(NewBitCast, CI.getType()); + } + return commonPointerCastTransforms(CI); } diff --git a/test/Transforms/InstCombine/addrspacecast.ll b/test/Transforms/InstCombine/addrspacecast.ll index d908b556e19..c1684361f99 100644 --- a/test/Transforms/InstCombine/addrspacecast.ll +++ b/test/Transforms/InstCombine/addrspacecast.ll @@ -28,13 +28,91 @@ define <4 x i32*> @combine_redundant_addrspacecast_vector(<4 x i32 addrspace(1)* define float* @combine_redundant_addrspacecast_types(i32 addrspace(1)* %x) nounwind { ; CHECK-LABEL: @combine_redundant_addrspacecast_types( -; CHECK: addrspacecast i32 addrspace(1)* %x to float* +; CHECK-NEXT: bitcast i32 addrspace(1)* %x to float addrspace(1)* +; CHECK-NEXT: addrspacecast float addrspace(1)* %1 to float* ; CHECK-NEXT: ret %y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(3)* %z = addrspacecast i32 addrspace(3)* %y to float* ret float* %z } +define <4 x float*> @combine_redundant_addrspacecast_types_vector(<4 x i32 addrspace(1)*> %x) nounwind { +; CHECK-LABEL: @combine_redundant_addrspacecast_types_vector( +; CHECK-NEXT: bitcast <4 x i32 addrspace(1)*> %x to <4 x float addrspace(1)*> +; CHECK-NEXT: addrspacecast <4 x float addrspace(1)*> %1 to <4 x float*> +; CHECK-NEXT: ret + %y = addrspacecast <4 x i32 addrspace(1)*> %x to <4 x i32 addrspace(3)*> + %z = addrspacecast <4 x i32 addrspace(3)*> %y to <4 x float*> + ret <4 x float*> %z +} + +define float addrspace(2)* @combine_addrspacecast_bitcast_1(i32 addrspace(1)* %x) nounwind { +; CHECK-LABEL: @combine_addrspacecast_bitcast_1( +; CHECK-NEXT: bitcast i32 addrspace(1)* %x to float addrspace(1)* +; CHECK-NEXT: addrspacecast float addrspace(1)* %1 to float addrspace(2)* +; CHECK-NEXT: ret + %y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(2)* + %z = bitcast i32 addrspace(2)* %y to float addrspace(2)* + ret float addrspace(2)* %z +} + +define i32 addrspace(2)* @combine_addrspacecast_bitcast_2(i32 addrspace(1)* %x) nounwind { +; CHECK-LABEL: @combine_addrspacecast_bitcast_2( +; CHECK: addrspacecast i32 addrspace(1)* %x to i32 addrspace(2)* +; CHECK-NEXT: ret + %y = addrspacecast i32 addrspace(1)* %x to float addrspace(2)* + %z = bitcast float addrspace(2)* %y to i32 addrspace(2)* + ret i32 addrspace(2)* %z +} + +define i32 addrspace(2)* @combine_bitcast_addrspacecast_1(i32 addrspace(1)* %x) nounwind { +; CHECK-LABEL: @combine_bitcast_addrspacecast_1( +; CHECK: addrspacecast i32 addrspace(1)* %x to i32 addrspace(2)* +; CHECK-NEXT: ret + %y = bitcast i32 addrspace(1)* %x to i8 addrspace(1)* + %z = addrspacecast i8 addrspace(1)* %y to i32 addrspace(2)* + ret i32 addrspace(2)* %z +} + +define float addrspace(2)* @combine_bitcast_addrspacecast_2(i32 addrspace(1)* %x) nounwind { +; CHECK-LABEL: @combine_bitcast_addrspacecast_2( +; CHECK: bitcast i32 addrspace(1)* %x to float addrspace(1)* +; CHECK: addrspacecast float addrspace(1)* %1 to float addrspace(2)* +; CHECK-NEXT: ret + %y = bitcast i32 addrspace(1)* %x to i8 addrspace(1)* + %z = addrspacecast i8 addrspace(1)* %y to float addrspace(2)* + ret float addrspace(2)* %z +} + +define float addrspace(2)* @combine_addrspacecast_types(i32 addrspace(1)* %x) nounwind { +; CHECK-LABEL: @combine_addrspacecast_types( +; CHECK-NEXT: bitcast i32 addrspace(1)* %x to float addrspace(1)* +; CHECK-NEXT: addrspacecast float addrspace(1)* %1 to float addrspace(2)* +; CHECK-NEXT: ret + %y = addrspacecast i32 addrspace(1)* %x to float addrspace(2)* + ret float addrspace(2)* %y +} + +define <4 x float addrspace(2)*> @combine_addrspacecast_types_vector(<4 x i32 addrspace(1)*> %x) nounwind { +; CHECK-LABEL: @combine_addrspacecast_types_vector( +; CHECK-NEXT: bitcast <4 x i32 addrspace(1)*> %x to <4 x float addrspace(1)*> +; CHECK-NEXT: addrspacecast <4 x float addrspace(1)*> %1 to <4 x float addrspace(2)*> +; CHECK-NEXT: ret + %y = addrspacecast <4 x i32 addrspace(1)*> %x to <4 x float addrspace(2)*> + ret <4 x float addrspace(2)*> %y +} + +define i32 @canonicalize_addrspacecast([16 x i32] addrspace(1)* %arr) { +; CHECK-LABEL: @canonicalize_addrspacecast( +; CHECK-NEXT: getelementptr inbounds [16 x i32] addrspace(1)* %arr, i32 0, i32 0 +; CHECK-NEXT: addrspacecast i32 addrspace(1)* %{{[a-zA-Z0-9]+}} to i32* +; CHECK-NEXT: load i32* +; CHECK-NEXT: ret i32 + %p = addrspacecast [16 x i32] addrspace(1)* %arr to i32* + %v = load i32* %p + ret i32 %v +} + @const_array = addrspace(2) constant [60 x i8] [i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll index ef0cb29fd88..3240c6d2a4d 100644 --- a/test/Transforms/InstCombine/getelementptr.ll +++ b/test/Transforms/InstCombine/getelementptr.ll @@ -732,7 +732,8 @@ define i64 @test_gep_bitcast_array_same_size_element([100 x double]* %arr, i64 % define i64 @test_gep_bitcast_array_same_size_element_addrspacecast([100 x double]* %arr, i64 %N) { ; CHECK-LABEL: @test_gep_bitcast_array_same_size_element_addrspacecast( ; CHECK: getelementptr [100 x double]* %arr, i64 0, i64 %V -; CHECK-NEXT: %t = addrspacecast double* +; CHECK-NEXT: bitcast double* +; CHECK-NEXT: %t = addrspacecast i64* ; CHECK: load i64 addrspace(3)* %t %cast = addrspacecast [100 x double]* %arr to i64 addrspace(3)* %V = mul i64 %N, 8 @@ -802,10 +803,22 @@ define i16 @test41([3 x i32] addrspace(1)* %array) { ; CHECK-NEXT: ret i16 8 } -define i32 addrspace(1)* @ascast_0_gep([128 x i32]* %p) nounwind { +define i32 addrspace(1)* @ascast_0_gep(i32* %p) nounwind { ; CHECK-LABEL: @ascast_0_gep( ; CHECK-NOT: getelementptr ; CHECK: ret + %gep = getelementptr i32* %p, i32 0 + %x = addrspacecast i32* %gep to i32 addrspace(1)* + ret i32 addrspace(1)* %x +} + +; Do not merge the GEP and the addrspacecast, because it would undo the +; addrspacecast canonicalization. +define i32 addrspace(1)* @ascast_0_0_gep([128 x i32]* %p) nounwind { +; CHECK-LABEL: @ascast_0_0_gep( +; CHECK-NEXT: getelementptr [128 x i32] +; CHECK-NEXT: addrspacecast i32* +; CHECK-NEXT: ret i32 addrspace(1)* %gep = getelementptr [128 x i32]* %p, i32 0, i32 0 %x = addrspacecast i32* %gep to i32 addrspace(1)* ret i32 addrspace(1)* %x diff --git a/test/Transforms/InstCombine/memcpy-from-global.ll b/test/Transforms/InstCombine/memcpy-from-global.ll index b5a0ab83bc1..3bc1d36f486 100644 --- a/test/Transforms/InstCombine/memcpy-from-global.ll +++ b/test/Transforms/InstCombine/memcpy-from-global.ll @@ -78,7 +78,8 @@ define void @test2_addrspacecast() { ; %A alloca is deleted ; This doesn't exactly match what test2 does, because folding the type ; cast into the alloca doesn't work for the addrspacecast yet. -; CHECK-NEXT: alloca %T +; CHECK-NEXT: alloca [124 x i8] +; CHECK-NEXT: getelementptr ; CHECK-NEXT: addrspacecast ; use @G instead of %A -- 2.34.1