From: Tyler Nowicki Date: Mon, 10 Aug 2015 19:51:46 +0000 (+0000) Subject: Late evaluation of the fast-math vectorization requirement. X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=9224227bc2310ddda5e4bf7bc8aebde48fae7f4d;p=oota-llvm.git Late evaluation of the fast-math vectorization requirement. This patch moves the verification of fast-math to just before vectorization is done. This way we can tell clang to append the command line options would that allow floating-point commutativity. Specifically those are enableing fast-math or specifying a loop hint. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@244489 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/DiagnosticInfo.h b/include/llvm/IR/DiagnosticInfo.h index f38313f82ea..40dfa6c6a49 100644 --- a/include/llvm/IR/DiagnosticInfo.h +++ b/include/llvm/IR/DiagnosticInfo.h @@ -56,6 +56,7 @@ enum DiagnosticKind { DK_OptimizationRemark, DK_OptimizationRemarkMissed, DK_OptimizationRemarkAnalysis, + DK_OptimizationRemarkAnalysisFPCommute, DK_OptimizationFailure, DK_MIRParser, DK_FirstPluginKind @@ -386,6 +387,42 @@ public: /// \see DiagnosticInfoOptimizationBase::isEnabled. bool isEnabled() const override; + +protected: + DiagnosticInfoOptimizationRemarkAnalysis(enum DiagnosticKind Kind, + const char *PassName, + const Function &Fn, + const DebugLoc &DLoc, + const Twine &Msg) + : DiagnosticInfoOptimizationBase(Kind, DS_Remark, PassName, Fn, DLoc, + Msg) {} +}; + +/// Diagnostic information for optimization analysis remarks related to +/// floating-point non-commutativity. +class DiagnosticInfoOptimizationRemarkAnalysisFPCommute + : public DiagnosticInfoOptimizationRemarkAnalysis { +public: + /// \p PassName is the name of the pass emitting this diagnostic. If + /// this name matches the regular expression given in -Rpass-analysis=, then + /// the diagnostic will be emitted. \p Fn is the function where the diagnostic + /// is being emitted. \p DLoc is the location information to use in the + /// diagnostic. If line table information is available, the diagnostic will + /// include the source code location. \p Msg is the message to show. The + /// front-end will append its own message related to options that address + /// floating-point non-commutativity. Note that this class does not copy this + /// message, so this reference must be valid for the whole life time of the + /// diagnostic. + DiagnosticInfoOptimizationRemarkAnalysisFPCommute(const char *PassName, + const Function &Fn, + const DebugLoc &DLoc, + const Twine &Msg) + : DiagnosticInfoOptimizationRemarkAnalysis( + DK_OptimizationRemarkAnalysisFPCommute, PassName, Fn, DLoc, Msg) {} + + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == DK_OptimizationRemarkAnalysisFPCommute; + } }; /// Diagnostic information for machine IR parser. @@ -438,6 +475,18 @@ void emitOptimizationRemarkAnalysis(LLVMContext &Ctx, const char *PassName, const Function &Fn, const DebugLoc &DLoc, const Twine &Msg); +/// Emit an optimization analysis remark related to messages about +/// floating-point non-commutativity. \p PassName is the name of the pass +/// emitting the message. If -Rpass-analysis= is given and \p PassName matches +/// the regular expression in -Rpass, then the remark will be emitted. \p Fn is +/// the function triggering the remark, \p DLoc is the debug location where the +/// diagnostic is generated. \p Msg is the message string to use. +void emitOptimizationRemarkAnalysisFPCommute(LLVMContext &Ctx, + const char *PassName, + const Function &Fn, + const DebugLoc &DLoc, + const Twine &Msg); + /// Diagnostic information for optimization failures. class DiagnosticInfoOptimizationFailure : public DiagnosticInfoOptimizationBase { diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h index 0137e4e27f7..a890a57a28f 100644 --- a/include/llvm/Transforms/Utils/LoopUtils.h +++ b/include/llvm/Transforms/Utils/LoopUtils.h @@ -85,24 +85,32 @@ public: RecurrenceDescriptor() : StartValue(nullptr), LoopExitInstr(nullptr), Kind(RK_NoRecurrence), - MinMaxKind(MRK_Invalid) {} + MinMaxKind(MRK_Invalid), UnsafeAlgebraInst(nullptr) {} RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K, - MinMaxRecurrenceKind MK) - : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {} + MinMaxRecurrenceKind MK, + Instruction *UAI /*Unsafe Algebra Inst*/) + : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK), + UnsafeAlgebraInst(UAI) {} /// This POD struct holds information about a potential recurrence operation. class InstDesc { public: - InstDesc(bool IsRecur, Instruction *I) - : IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid) {} + InstDesc(bool IsRecur, Instruction *I, Instruction *UAI = nullptr) + : IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid), + UnsafeAlgebraInst(UAI) {} - InstDesc(Instruction *I, MinMaxRecurrenceKind K) - : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K) {} + InstDesc(Instruction *I, MinMaxRecurrenceKind K, Instruction *UAI = nullptr) + : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K), + UnsafeAlgebraInst(UAI) {} bool isRecurrence() { return IsRecurrence; } + bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; } + + Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; } + MinMaxRecurrenceKind getMinMaxKind() { return MinMaxKind; } Instruction *getPatternInst() { return PatternLastInst; } @@ -115,6 +123,8 @@ public: Instruction *PatternLastInst; // If this is a min/max pattern the comparison predicate. MinMaxRecurrenceKind MinMaxKind; + // Recurrence has unsafe algebra. + Instruction *UnsafeAlgebraInst; }; /// Returns a struct describing if the instruction 'I' can be a recurrence @@ -167,6 +177,13 @@ public: Instruction *getLoopExitInstr() { return LoopExitInstr; } + /// Returns true if the recurrence has unsafe algebra which requires a relaxed + /// floating-point model. + bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; } + + /// Returns first unsafe algebra instruction in the PHI node's use-chain. + Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; } + private: // The starting value of the recurrence. // It does not have to be zero! @@ -177,6 +194,8 @@ private: RecurrenceKind Kind; // If this a min/max recurrence the kind of recurrence. MinMaxRecurrenceKind MinMaxKind; + // First occurance of unasfe algebra in the PHI's use-chain. + Instruction *UnsafeAlgebraInst; }; BasicBlock *InsertPreheaderForLoop(Loop *L, Pass *P); diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp index 6612681d689..07c6714e913 100644 --- a/lib/IR/DiagnosticInfo.cpp +++ b/lib/IR/DiagnosticInfo.cpp @@ -196,6 +196,15 @@ void llvm::emitOptimizationRemarkAnalysis(LLVMContext &Ctx, DiagnosticInfoOptimizationRemarkAnalysis(PassName, Fn, DLoc, Msg)); } +void llvm::emitOptimizationRemarkAnalysisFPCommute(LLVMContext &Ctx, + const char *PassName, + const Function &Fn, + const DebugLoc &DLoc, + const Twine &Msg) { + Ctx.diagnose(DiagnosticInfoOptimizationRemarkAnalysisFPCommute(PassName, Fn, + DLoc, Msg)); +} + bool DiagnosticInfoOptimizationFailure::isEnabled() const { // Only print warnings. return getSeverity() == DS_Warning; diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp index 59680f5eef5..803d24bcb76 100644 --- a/lib/IR/LLVMContext.cpp +++ b/lib/IR/LLVMContext.cpp @@ -199,6 +199,11 @@ static bool isDiagnosticEnabled(const DiagnosticInfo &DI) { if (!cast(DI).isEnabled()) return false; break; + case llvm::DK_OptimizationRemarkAnalysisFPCommute: + if (!cast(DI) + .isEnabled()) + return false; + break; default: break; } diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index 5cbde94a98e..dae19d23db6 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -201,7 +201,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind, // Save the description of this reduction variable. RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, - ReduxDesc.getMinMaxKind()); + ReduxDesc.getMinMaxKind(), + ReduxDesc.getUnsafeAlgebraInst()); RedDes = RD; @@ -263,7 +264,10 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, InstDesc &Prev, bool HasFunNoNaNAttr) { bool FP = I->getType()->isFloatingPointTy(); - bool FastMath = FP && I->hasUnsafeAlgebra(); + Instruction *UAI = Prev.getUnsafeAlgebraInst(); + if (!UAI && FP && !I->hasUnsafeAlgebra()) + UAI = I; // Found an unsafe (unvectorizable) algebra instruction. + switch (I->getOpcode()) { default: return InstDesc(false, I); @@ -284,10 +288,10 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind, case Instruction::Xor: return InstDesc(Kind == RK_IntegerXor, I); case Instruction::FMul: - return InstDesc(Kind == RK_FloatMult && FastMath, I); + return InstDesc(Kind == RK_FloatMult, I, UAI); case Instruction::FSub: case Instruction::FAdd: - return InstDesc(Kind == RK_FloatAdd && FastMath, I); + return InstDesc(Kind == RK_FloatAdd, I, UAI); case Instruction::FCmp: case Instruction::ICmp: case Instruction::Select: diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 47e7436cf6d..9cfd5feca2d 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -220,6 +220,7 @@ namespace { class LoopVectorizationLegality; class LoopVectorizationCostModel; class LoopVectorizeHints; +class LoopVectorizationRequirements; /// \brief This modifies LoopAccessReport to initialize message with /// loop-vectorizer-specific part. @@ -796,10 +797,12 @@ public: LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DominatorTree *DT, TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F, const TargetTransformInfo *TTI, - LoopAccessAnalysis *LAA) + LoopAccessAnalysis *LAA, + LoopVectorizationRequirements *R) : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F), TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(SE, L, DT), - Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false) {} + Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false), + Requirements(R) {} /// This enum represents the kinds of inductions that we support. enum InductionKind { @@ -1065,6 +1068,9 @@ private: /// Can we assume the absence of NaNs. bool HasFunNoNaNAttr; + /// Vectorization requirements that will go through late-evaluation. + LoopVectorizationRequirements *Requirements; + ValueToValueMap Strides; SmallPtrSet StrideSet; @@ -1415,6 +1421,47 @@ static void emitMissedWarning(Function *F, Loop *L, } } +/// \brief This holds vectorization requirements that must be verified late in +/// the process. The requirements are set by legalize and costmodel. Once +/// vectorization has been determined to be possible and profitable the +/// requirements can be verified by looking for metadata or compiler options. +/// For example, some loops require FP commutativity which is only allowed if +/// vectorization is explicitly specified or if the fast-math compiler option +/// has been provided. +/// Late evaluation of these requirements allows helpful diagnostics to be +/// composed that tells the user what need to be done to vectorize the loop. For +/// example, by specifying #pragma clang loop vectorize or -ffast-math. Late +/// evaluation should be used only when diagnostics can generated that can be +/// followed by a non-expert user. +class LoopVectorizationRequirements { +public: + LoopVectorizationRequirements() : UnsafeAlgebraInst(nullptr) {} + + void addUnsafeAlgebraInst(Instruction *I) { + // First unsafe algebra instruction. + if (!UnsafeAlgebraInst) + UnsafeAlgebraInst = I; + } + + bool doesNotMeet(Function *F, const LoopVectorizeHints &Hints) { + if (UnsafeAlgebraInst && + Hints.getForce() == LoopVectorizeHints::FK_Undefined && + Hints.getWidth() == 0) { + emitOptimizationRemarkAnalysisFPCommute( + F->getContext(), DEBUG_TYPE, *F, UnsafeAlgebraInst->getDebugLoc(), + VectorizationReport() << "vectorization requires changes in the " + "order of operations, however IEEE 754 " + "floating-point operations are not " + "commutative"); + return true; + } + return false; + } + +private: + Instruction *UnsafeAlgebraInst; +}; + static void addInnerLoop(Loop &L, SmallVectorImpl &V) { if (L.empty()) return V.push_back(&L); @@ -1609,7 +1656,9 @@ struct LoopVectorize : public FunctionPass { } // Check if it is legal to vectorize the loop. - LoopVectorizationLegality LVL(L, SE, DT, TLI, AA, F, TTI, LAA); + LoopVectorizationRequirements Requirements; + LoopVectorizationLegality LVL(L, SE, DT, TLI, AA, F, TTI, LAA, + &Requirements); if (!LVL.canVectorize()) { DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); emitMissedWarning(F, L, Hints); @@ -1665,6 +1714,13 @@ struct LoopVectorize : public FunctionPass { std::string VecDiagMsg, IntDiagMsg; bool VectorizeLoop = true, InterleaveLoop = true; + if (Requirements.doesNotMeet(F, Hints)) { + DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization " + "requirements.\n"); + emitMissedWarning(F, L, Hints); + return false; + } + if (VF.Width == 1) { DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); VecDiagMsg = @@ -4079,6 +4135,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, Reductions[Phi])) { + if (Reductions[Phi].hasUnsafeAlgebra()) + Requirements->addUnsafeAlgebraInst( + Reductions[Phi].getUnsafeAlgebraInst()); AllowedExit.insert(Reductions[Phi].getLoopExitInstr()); continue; } diff --git a/test/Transforms/LoopVectorize/no_fpmath.ll b/test/Transforms/LoopVectorize/no_fpmath.ll new file mode 100644 index 00000000000..709025f4cd8 --- /dev/null +++ b/test/Transforms/LoopVectorize/no_fpmath.ll @@ -0,0 +1,104 @@ +; RUN: opt < %s -loop-vectorize -S -pass-remarks='loop-vectorize' -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s + +; CHECK: remark: no_fpmath.c:6:11: loop not vectorized: vectorization requires changes in the order of operations, however IEEE 754 floating-point operations are not commutative +; CHECK: remark: no_fpmath.c:6:14: loop not vectorized: +; CHECK: remark: no_fpmath.c:17:14: vectorized loop (vectorization width: 2, interleaved count: 2) + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +; Function Attrs: nounwind readonly ssp uwtable +define double @cond_sum(i32* nocapture readonly %v, i32 %n) #0 { +entry: + %cmp.7 = icmp sgt i32 %n, 0, !dbg !3 + br i1 %cmp.7, label %for.body.preheader, label %for.cond.cleanup, !dbg !8 + +for.body.preheader: ; preds = %entry + br label %for.body, !dbg !9 + +for.cond.cleanup.loopexit: ; preds = %for.body + %add.lcssa = phi double [ %add, %for.body ] + br label %for.cond.cleanup, !dbg !10 + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + %a.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %for.cond.cleanup.loopexit ] + ret double %a.0.lcssa, !dbg !10 + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %a.08 = phi double [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %v, i64 %indvars.iv, !dbg !9 + %0 = load i32, i32* %arrayidx, align 4, !dbg !9, !tbaa !11 + %cmp1 = icmp eq i32 %0, 0, !dbg !15 + %cond = select i1 %cmp1, double 3.400000e+00, double 1.150000e+00, !dbg !9 + %add = fadd double %a.08, %cond, !dbg !16 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !8 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !8 + %exitcond = icmp eq i32 %lftr.wideiv, %n, !dbg !8 + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !8, !llvm.loop !17 +} + +; Function Attrs: nounwind readonly ssp uwtable +define double @cond_sum_loop_hint(i32* nocapture readonly %v, i32 %n) #0 { +entry: + %cmp.7 = icmp sgt i32 %n, 0, !dbg !19 + br i1 %cmp.7, label %for.body.preheader, label %for.cond.cleanup, !dbg !21 + +for.body.preheader: ; preds = %entry + br label %for.body, !dbg !22 + +for.cond.cleanup.loopexit: ; preds = %for.body + %add.lcssa = phi double [ %add, %for.body ] + br label %for.cond.cleanup, !dbg !23 + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + %a.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %for.cond.cleanup.loopexit ] + ret double %a.0.lcssa, !dbg !23 + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %a.08 = phi double [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %v, i64 %indvars.iv, !dbg !22 + %0 = load i32, i32* %arrayidx, align 4, !dbg !22, !tbaa !11 + %cmp1 = icmp eq i32 %0, 0, !dbg !24 + %cond = select i1 %cmp1, double 3.400000e+00, double 1.150000e+00, !dbg !22 + %add = fadd double %a.08, %cond, !dbg !25 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !21 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !21 + %exitcond = icmp eq i32 %lftr.wideiv, %n, !dbg !21 + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !21, !llvm.loop !26 +} + +attributes #0 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{i32 1, !"PIC Level", i32 2} +!2 = !{!"clang version 3.7.0"} +!3 = !DILocation(line: 5, column: 20, scope: !4) +!4 = !DISubprogram(name: "cond_sum", scope: !5, file: !5, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, function: double (i32*, i32)* @cond_sum, variables: !7) +!5 = !DIFile(filename: "no_fpmath.c", directory: "") +!6 = !DISubroutineType(types: !7) +!7 = !{} +!8 = !DILocation(line: 5, column: 3, scope: !4) +!9 = !DILocation(line: 6, column: 14, scope: !4) +!10 = !DILocation(line: 9, column: 3, scope: !4) +!11 = !{!12, !12, i64 0} +!12 = !{!"int", !13, i64 0} +!13 = !{!"omnipotent char", !14, i64 0} +!14 = !{!"Simple C/C++ TBAA"} +!15 = !DILocation(line: 6, column: 19, scope: !4) +!16 = !DILocation(line: 6, column: 11, scope: !4) +!17 = distinct !{!17, !18} +!18 = !{!"llvm.loop.unroll.disable"} +!19 = !DILocation(line: 16, column: 20, scope: !20) +!20 = !DISubprogram(name: "cond_sum_loop_hint", scope: !5, file: !5, line: 12, type: !6, isLocal: false, isDefinition: true, scopeLine: 12, flags: DIFlagPrototyped, isOptimized: true, function: double (i32*, i32)* @cond_sum_loop_hint, variables: !7) +!21 = !DILocation(line: 16, column: 3, scope: !20) +!22 = !DILocation(line: 17, column: 14, scope: !20) +!23 = !DILocation(line: 20, column: 3, scope: !20) +!24 = !DILocation(line: 17, column: 19, scope: !20) +!25 = !DILocation(line: 17, column: 11, scope: !20) +!26 = distinct !{!26, !27, !18} +!27 = !{!"llvm.loop.vectorize.enable", i1 true}