From 8574cb5905b62fc2d12aee61b39edcfab1c9f57e Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Tue, 12 May 2015 21:28:39 +0000 Subject: [PATCH] Revert "ARM: Remove Itineraries for swift CPU" Reverting until I figure out the new lit failures. This reverts commit r237179. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237189 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMScheduleSwift.td | 1046 ++++++++++++++++++++++++++++ 1 file changed, 1046 insertions(+) diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td index 85410ae8478..b03d5ff44c6 100644 --- a/lib/Target/ARM/ARMScheduleSwift.td +++ b/lib/Target/ARM/ARMScheduleSwift.td @@ -25,6 +25,1050 @@ def SW_LS : FuncUnit; def SW_IDIV : FuncUnit; def SW_FDIV : FuncUnit; +// FIXME: Need bypasses. +// FIXME: Model the multiple stages of IIC_iMOVix2, IIC_iMOVix2addpc, and +// IIC_iMOVix2ld better. +// FIXME: Model the special immediate shifts that are not microcoded. +// FIXME: Do we need to model the fact that uses of r15 in a micro-op force it +// to issue on pipe 1? +// FIXME: Model the pipelined behavior of CMP / TST instructions. +// FIXME: Better model the microcode stages of multiply instructions, especially +// conditional variants. +// FIXME: Add preload instruction when it is documented. +// FIXME: Model non-pipelined nature of FP div / sqrt unit. + +def SwiftItineraries : ProcessorItineraries< + [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [ + // + // Move instructions, unconditional + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData, + InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [3]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_LS]>], + [5]>, + // + // MVN instructions + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + // + // No operand cycles + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>]>, + // + // Binary Instructions that produce a result + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1]>, + // + // Bitwise Instructions that produce a result + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1]>, + // + // Unary Instructions that produce a result + + // CLZ, RBIT, etc. + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + + // BFC, BFI, UBFX, SBFX + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1]>, + + // + // Zero and sign extension instructions + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1, 1, 1]>, + // + // Compare instructions + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData, + InstrStage<2, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData, + InstrStage<2, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + // + // Test instructions + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData, + InstrStage<2, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData, + InstrStage<2, [SW_ALU0, SW_ALU1]>], + [1, 1, 1]>, + // + // Move instructions, conditional + // FIXME: Correctly model the extra input dep on the destination. + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1, 1]>, + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2]>, + + // Integer multiply pipeline + // + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [3, 1, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_ALU0], 3>, + InstrStage<1, [SW_ALU0]>], + [5, 5, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [5, 6, 1, 1]>, + // + // Integer divide + InstrItinData, + InstrStage<1, [SW_ALU0], 0>, + InstrStage<14, [SW_IDIV]>], + [14, 1, 1]>, + + // Integer load pipeline + // FIXME: The timings are some rough approximations + // + // Immediate offset + InstrItinData, + InstrStage<1, [SW_LS]>], + [3, 1]>, + InstrItinData, + InstrStage<1, [SW_LS]>], + [3, 1]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_LS]>], + [3, 4, 1]>, + // + // Register offset + InstrItinData, + InstrStage<1, [SW_LS]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_LS]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [3, 4, 1, 1]>, + // + // Scaled register offset + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS]>], + [5, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS]>], + [5, 1, 1]>, + // + // Immediate offset with update + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [3, 1, 1]>, + // + // Register offset with update + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_LS]>], + [3, 1, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0], 1>, + InstrStage<1, [SW_LS]>], + [3, 1, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [3, 4, 1, 1]>, + // + // Scaled register offset with update + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [5, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [5, 3, 1, 1]>, + // + // Load multiple, def is the 5th operand. + // FIXME: This assumes 3 to 4 registers. + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1, 3], [], -1>, // dynamic uops + + // + // Load multiple + update, defs are the 1st and 5th operands. + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1, 3], [], -1>, // dynamic uops + // + // Load multiple plus branch + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1, 3], [], -1>, // dynamic uops + // + // Pop, def is the 3rd operand. + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 3], [], -1>, // dynamic uops + // + // Pop + branch, def is the 3rd operand. + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 3], [], -1>, // dynamic uops + + // + // iLoadi + iALUr for t2LDRpci_pic. + InstrItinData, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [4, 1]>, + + // Integer store pipeline + /// + // Immediate offset + InstrItinData, + InstrStage<1, [SW_LS]>], + [1, 1]>, + InstrItinData, + InstrStage<1, [SW_LS]>], + [1, 1]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1]>, + // + // Register offset + InstrItinData, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + // + // Scaled register offset + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + // + // Immediate offset with update + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1]>, + // + // Register offset with update + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 1]>, + // + // Scaled register offset with update + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>], + [3, 1, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 2>, + InstrStage<1, [SW_LS], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>], + [3, 1, 1, 1]>, + // + // Store multiple + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [], [], -1>, // dynamic uops + // + // Store multiple + update + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS], 1>, + InstrStage<1, [SW_ALU0, SW_ALU1], 1>, + InstrStage<1, [SW_LS]>], + [2], [], -1>, // dynamic uops + + // + // Preload + InstrItinData], [1, 1]>, + + // Branch + // + // no delay slots, so the latency of a branch is unimportant + InstrItinData]>, + + // FP Special Register to Integer Register File Move + InstrItinData, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [1]>, + // + // Single-precision FP Unary + // + // Most floating-point moves get issued on ALU0. + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + // + // Double-precision FP Unary + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + + // + // Single-precision FP Compare + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [1, 1]>, + // + // Double-precision FP Compare + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [1, 1]>, + // + // Single to Double FP Convert + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Double to Single FP Convert + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + + // + // Single to Half FP Convert + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU1], 4>, + InstrStage<1, [SW_ALU1]>], + [6, 1]>, + // + // Half to Single FP Convert + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + + // + // Single-Precision FP to Integer Convert + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Double-Precision FP to Integer Convert + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Integer to Single-Precision FP Convert + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Integer to Double-Precision FP Convert + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [4, 1]>, + // + // Single-precision FP ALU + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-precision FP ALU + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Single-precision FP Multiply + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Double-precision FP Multiply + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [6, 1, 1]>, + // + // Single-precision FP MAC + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-precision FP MAC + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [12, 1, 1]>, + // + // Single-precision Fused FP MAC + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-precision Fused FP MAC + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [12, 1, 1]>, + // + // Single-precision FP DIV + InstrItinData, + InstrStage<1, [SW_ALU1], 0>, + InstrStage<15, [SW_FDIV]>], + [17, 1, 1]>, + // + // Double-precision FP DIV + InstrItinData, + InstrStage<1, [SW_ALU1], 0>, + InstrStage<30, [SW_FDIV]>], + [32, 1, 1]>, + // + // Single-precision FP SQRT + InstrItinData, + InstrStage<1, [SW_ALU1], 0>, + InstrStage<15, [SW_FDIV]>], + [17, 1]>, + // + // Double-precision FP SQRT + InstrItinData, + InstrStage<1, [SW_ALU1], 0>, + InstrStage<30, [SW_FDIV]>], + [32, 1, 1]>, + + // + // Integer to Single-precision Move + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0]>], + [6, 1]>, + // + // Integer to Double-precision Move + InstrItinData, + InstrStage<1, [SW_LS]>], + [4, 1]>, + // + // Single-precision to Integer Move + InstrItinData, + InstrStage<1, [SW_LS]>], + [3, 1]>, + // + // Double-precision to Integer Move + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_LS]>], + [3, 4, 1]>, + // + // Single-precision FP Load + InstrItinData, + InstrStage<1, [SW_LS]>], + [4, 1]>, + // + // Double-precision FP Load + InstrItinData, + InstrStage<1, [SW_LS]>], + [4, 1]>, + // + // FP Load Multiple + // FIXME: Assumes a single Q register. + InstrItinData, + InstrStage<1, [SW_LS]>], + [1, 1, 1, 4], [], -1>, // dynamic uops + // + // FP Load Multiple + update + // FIXME: Assumes a single Q register. + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1, 4], [], -1>, // dynamic uops + // + // Single-precision FP Store + InstrItinData, + InstrStage<1, [SW_LS]>], + [1, 1]>, + // + // Double-precision FP Store + InstrItinData, + InstrStage<1, [SW_LS]>], + [1, 1]>, + // + // FP Store Multiple + // FIXME: Assumes a single Q register. + InstrItinData, + InstrStage<1, [SW_LS]>], + [1, 1, 1], [], -1>, // dynamic uops + // + // FP Store Multiple + update + // FIXME: Assumes a single Q register. + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0, SW_ALU1]>], + [2, 1, 1, 1], [], -1>, // dynamic uops + // NEON + // + // Double-register Integer Unary + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [4, 1]>, + // + // Quad-register Integer Unary + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [4, 1]>, + // + // Double-register Integer Q-Unary + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [4, 1]>, + // + // Quad-register Integer CountQ-Unary + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [4, 1]>, + // + // Double-register Integer Binary + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Quad-register Integer Binary + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-register Integer Subtract + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Quad-register Integer Subtract + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-register Integer Shift + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Quad-register Integer Shift + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-register Integer Shift (4 cycle) + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register Integer Shift (4 cycle) + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Double-register Integer Binary (4 cycle) + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register Integer Binary (4 cycle) + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Double-register Integer Subtract (4 cycle) + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register Integer Subtract (4 cycle) + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + + // + // Double-register Integer Count + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Quad-register Integer Count + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [2, 1, 1]>, + // + // Double-register Absolute Difference and Accumulate + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1, 1]>, + // + // Quad-register Absolute Difference and Accumulate + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1, 1]>, + // + // Double-register Integer Pair Add Long + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register Integer Pair Add Long + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + + // + // Double-register Integer Multiply (.8, .16) + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Quad-register Integer Multiply (.8, .16) + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + + // + // Double-register Integer Multiply (.32) + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Quad-register Integer Multiply (.32) + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Double-register Integer Multiply-Accumulate (.8, .16) + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1, 1]>, + // + // Double-register Integer Multiply-Accumulate (.32) + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1, 1]>, + // + // Quad-register Integer Multiply-Accumulate (.8, .16) + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1, 1]>, + // + // Quad-register Integer Multiply-Accumulate (.32) + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1, 1]>, + + // + // Move + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + // + // Move Immediate + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [2]>, + // + // Double-register Permute Move + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [2, 1]>, + // + // Quad-register Permute Move + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [2, 1]>, + // + // Integer to Single-precision Move + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0]>], + [6, 1]>, + // + // Integer to Double-precision Move + InstrItinData, + InstrStage<1, [SW_LS]>], + [4, 1, 1]>, + // + // Single-precision to Integer Move + InstrItinData, + InstrStage<1, [SW_LS]>], + [3, 1]>, + // + // Double-precision to Integer Move + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 3>, + InstrStage<1, [SW_LS]>], + [3, 4, 1]>, + // + // Integer to Lane Move + // FIXME: I think this is correct, but it is not clear from the tuning guide. + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_LS], 4>, + InstrStage<1, [SW_ALU0]>], + [6, 1]>, + + // + // Vector narrow move + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [2, 1]>, + // + // Double-register FP Unary + // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here, + // and they issue on a different pipeline. + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + // + // Quad-register FP Unary + // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here, + // and they issue on a different pipeline. + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [2, 1]>, + // + // Double-register FP Binary + // FIXME: We're using this itin for many instructions. + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + + // + // VPADD, etc. + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Double-register FP VMUL + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Quad-register FP Binary + InstrItinData, + InstrStage<1, [SW_ALU0]>], + [4, 1, 1]>, + // + // Quad-register FP VMUL + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [4, 1, 1]>, + // + // Double-register FP Multiple-Accumulate + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Quad-register FP Multiple-Accumulate + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-register Fused FP Multiple-Accumulate + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Quad-register FusedF P Multiple-Accumulate + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-register Reciprical Step + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Quad-register Reciprical Step + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [8, 1, 1]>, + // + // Double-register Permute + // FIXME: The latencies are unclear from the documentation. + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [3, 4, 3, 4]>, + // + // Quad-register Permute + // FIXME: The latencies are unclear from the documentation. + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [3, 4, 3, 4]>, + // + // Quad-register Permute (3 cycle issue on A9) + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [3, 4, 3, 4]>, + + // + // Double-register VEXT + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [2, 1, 1]>, + // + // Quad-register VEXT + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [2, 1, 1]>, + // + // VTB + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 3, 3]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [6, 1, 3, 5, 5]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 3, 5, 7, 7]>, + // + // VTBX + InstrItinData, + InstrStage<1, [SW_ALU1]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [4, 1, 3, 3]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [6, 1, 3, 5, 5]>, + InstrItinData, + InstrStage<1, [SW_DIS1], 0>, + InstrStage<1, [SW_DIS2], 0>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1], 2>, + InstrStage<1, [SW_ALU1]>], + [8, 1, 3, 5, 7, 7]> +]>; + // ===---------------------------------------------------------------------===// // This following definitions describe the simple machine model which // will replace itineraries. @@ -35,6 +1079,8 @@ def SwiftModel : SchedMachineModel { let MicroOpBufferSize = 45; // Based on NEON renamed registers. let LoadLatency = 3; let MispredictPenalty = 14; // A branch direction mispredict. + + let Itineraries = SwiftItineraries; } // Swift predicates. -- 2.34.1