From 344731c01805aeda49c747bac6148501fa85557c Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Wed, 20 Apr 2011 21:48:38 +0000 Subject: [PATCH] Fix another case of that only occurs with code generated by llvm-gcc, since llvm-gcc uses 2 i64s for passing a 4 x float vector on ARM rather than an i64 array like Clang. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129878 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../Scalar/ScalarReplAggregates.cpp | 75 +++++++++++-------- test/Transforms/ScalarRepl/vector_promote.ll | 15 ++++ 2 files changed, 59 insertions(+), 31 deletions(-) diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 82bb6455670..8178c270759 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -351,6 +351,9 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset, ->getPrimitiveSizeInBits()/8; if (EltSize == CurrentEltSize) return; + + if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize)) + return; } } @@ -661,23 +664,30 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, } /// getScaledElementType - Gets a scaled element type for a partial vector -/// access of an alloca. The input type must be an integer or float, and -/// the resulting type must be an integer, float or double. -static const Type *getScaledElementType(const Type *OldTy, +/// access of an alloca. The input types must be integer or floating-point +/// scalar or vector types, and the resulting type is an integer, float or +/// double. +static const Type *getScaledElementType(const Type *Ty1, const Type *Ty2, unsigned NewBitWidth) { - assert((OldTy->isIntegerTy() || OldTy->isFloatTy()) && "Partial vector " - "accesses must be scaled from integer or float elements."); - - LLVMContext &Context = OldTy->getContext(); - - if (OldTy->isIntegerTy()) - return Type::getIntNTy(Context, NewBitWidth); - if (NewBitWidth == 32) - return Type::getFloatTy(Context); - if (NewBitWidth == 64) - return Type::getDoubleTy(Context); + bool IsFP1 = Ty1->isFloatingPointTy() || + (Ty1->isVectorTy() && + cast(Ty1)->getElementType()->isFloatingPointTy()); + bool IsFP2 = Ty2->isFloatingPointTy() || + (Ty2->isVectorTy() && + cast(Ty2)->getElementType()->isFloatingPointTy()); + + LLVMContext &Context = Ty1->getContext(); + + // Prefer floating-point types over integer types, as integer types may have + // been created by earlier scalar replacement. + if (IsFP1 || IsFP2) { + if (NewBitWidth == 32) + return Type::getFloatTy(Context); + if (NewBitWidth == 64) + return Type::getDoubleTy(Context); + } - llvm_unreachable("Invalid type for a partial vector access of an alloca!"); + return Type::getIntNTy(Context, NewBitWidth); } /// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector @@ -744,15 +754,11 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, return CreateShuffleVectorCast(FromVal, ToType, Builder); } - if (ToType->isVectorTy()) { - assert(isPowerOf2_64(AllocaSize / ToTypeSize) && - "Partial vector access of an alloca must have a power-of-2 size " - "ratio."); - assert(Offset == 0 && "Can't extract a value of a smaller vector type " - "from a nonzero offset."); + if (isPowerOf2_64(AllocaSize / ToTypeSize)) { + assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value " + "of a smaller vector type at a nonzero offset."); - const Type *ToElementTy = cast(ToType)->getElementType(); - const Type *CastElementTy = getScaledElementType(ToElementTy, + const Type *CastElementTy = getScaledElementType(FromType, ToType, ToTypeSize * 8); unsigned NumCastVectorElements = AllocaSize / ToTypeSize; @@ -760,8 +766,12 @@ ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, const Type *CastTy = VectorType::get(CastElementTy, NumCastVectorElements); Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp"); + + unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy); + unsigned Elt = Offset/EltSize; + assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get( - Type::getInt32Ty(Context), 0), "tmp"); + Type::getInt32Ty(Context), Elt), "tmp"); return Builder.CreateBitCast(Extract, ToType, "tmp"); } @@ -893,13 +903,12 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, return CreateShuffleVectorCast(SV, VTy, Builder); } - if (SV->getType()->isVectorTy() && isPowerOf2_64(VecSize / ValSize)) { - assert(Offset == 0 && "Can't insert a value of a smaller vector type at " - "a nonzero offset."); + if (isPowerOf2_64(VecSize / ValSize)) { + assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a " + "value of a smaller vector type at a nonzero offset."); - const Type *ToElementTy = - cast(SV->getType())->getElementType(); - const Type *CastElementTy = getScaledElementType(ToElementTy, ValSize); + const Type *CastElementTy = getScaledElementType(VTy, SV->getType(), + ValSize); unsigned NumCastVectorElements = VecSize / ValSize; LLVMContext &Context = SV->getContext(); @@ -908,9 +917,13 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp"); Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp"); + + unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy); + unsigned Elt = Offset/EltSize; + assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); Value *Insert = Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get( - Type::getInt32Ty(Context), 0), "tmp"); + Type::getInt32Ty(Context), Elt), "tmp"); return Builder.CreateBitCast(Insert, AllocaType, "tmp"); } diff --git a/test/Transforms/ScalarRepl/vector_promote.ll b/test/Transforms/ScalarRepl/vector_promote.ll index 9c17a54294f..c51ef109c0b 100644 --- a/test/Transforms/ScalarRepl/vector_promote.ll +++ b/test/Transforms/ScalarRepl/vector_promote.ll @@ -248,3 +248,18 @@ entry: ; CHECK: shufflevector <4 x i64> %tmpV2, <4 x i64> undef, <3 x i32> } +define <4 x float> @test16(<4 x float> %x, i64 %y0, i64 %y1) { +entry: + %tmp8 = bitcast <4 x float> undef to <2 x double> + %tmp9 = bitcast i64 %y0 to double + %tmp10 = insertelement <2 x double> %tmp8, double %tmp9, i32 0 + %tmp11 = bitcast <2 x double> %tmp10 to <4 x float> + %tmp3 = bitcast <4 x float> %tmp11 to <2 x double> + %tmp4 = bitcast i64 %y1 to double + %tmp5 = insertelement <2 x double> %tmp3, double %tmp4, i32 1 + %tmp6 = bitcast <2 x double> %tmp5 to <4 x float> + ret <4 x float> %tmp6 +; CHECK: @test16 +; CHECK-NOT: alloca +; CHECK: bitcast <4 x float> %tmp11 to <2 x double> +} -- 2.34.1