From 1eb67a4f84d49d88454e2b6142d30e81c839209f Mon Sep 17 00:00:00 2001
From: Chad Rosier <mcrosier@codeaurora.org>
Date: Fri, 21 Mar 2014 19:34:41 +0000
Subject: [PATCH] [AArch64] Add SchedRW lists to NEON instructions.

Previously, only regular AArch64 instructions were annotated with SchedRW lists.
This patch does the same for NEON enabling these instructions to be scheduled by
the MIScheduler. Additionally, store operations are now modeled and a few
SchedRW lists were updated for bug fixes (e.g. multiple def operands).

Reviewers: apazos, mcrosier, atrick
Patch by Dave Estes <cestes@codeaurora.org>!

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@204505 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/AArch64/AArch64InstrInfo.td    | 129 +++--
 lib/Target/AArch64/AArch64InstrNEON.td    | 603 +++++++++++++++-------
 lib/Target/AArch64/AArch64Schedule.td     |   8 +
 lib/Target/AArch64/AArch64ScheduleA53.td  |  18 +-
 test/CodeGen/AArch64/misched-basic-A53.ll |  31 +-
 5 files changed, 542 insertions(+), 247 deletions(-)
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 3dc66a1f238..7d7a641a2e3 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -463,7 +463,7 @@ defm SUBSww :addsub_exts<0b0, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR32, subc>,
                          (outs GPR32:$Rd)>;
 
 
-let Rd = 0b11111, isCompare = 1 in {
+let SchedRW = [WriteCMP, ReadCMP, ReadCMP], Rd = 0b11111, isCompare = 1 in {
 defm CMNx : addsub_exts<0b1, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
                         (outs), extends_to_i64>,
             addsub_xxtx<     0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, (outs)>;
@@ -689,7 +689,7 @@ multiclass addsubimm_varieties<string prefix, bit sf, bit op, bits<2> shift,
                             [(set NZCV,
                                   (A64cmp Ty:$Rn, cmp_imm_operand:$Imm12))],
                             NoItinerary>,
-           Sched<[WriteALU, ReadALU]> {
+           Sched<[WriteCMP, ReadCMP]> {
     let Rd = 0b11111;
     let Defs = [NZCV];
     let isCompare = 1;
@@ -1086,7 +1086,7 @@ def BFMwwii :
   A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
         (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
         "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
-  Sched<[WriteALU, ReadALU]> {
+  Sched<[WriteALU, ReadALU, ReadALU]> {
   let DecoderMethod = "DecodeBitfieldInstruction";
   let Constraints = "$src = $Rd";
 }
@@ -1095,7 +1095,7 @@ def BFMxxii :
   A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
         (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
         "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
-  Sched<[WriteALU, ReadALU]> {
+  Sched<[WriteALU, ReadALU, ReadALU]> {
   let DecoderMethod = "DecodeBitfieldInstruction";
   let Constraints = "$src = $Rd";
 }
@@ -1295,7 +1295,7 @@ defm UBFX :  A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>;
 def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
                           (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
                           "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
-                Sched<[WriteALU, ReadALU]> {
+                Sched<[WriteALU, ReadALU, ReadALU]> {
   // As above, no disassembler allowed.
   let isAsmParserOnly = 1;
   let Constraints = "$src = $Rd";
@@ -1304,7 +1304,7 @@ def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
 def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
                           (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
                           "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
-                Sched<[WriteALU, ReadALU]> {
+                Sched<[WriteALU, ReadALU, ReadALU]> {
   // As above, no disassembler allowed.
   let isAsmParserOnly = 1;
   let Constraints = "$src = $Rd";
@@ -1407,7 +1407,7 @@ defm UBFIZ :  A64I_bitfield_insert<0b10, "ubfiz">;
 def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
                 (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
                 "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
-              Sched<[WriteALU, ReadALU]> {
+              Sched<[WriteALU, ReadALU, ReadALU]> {
   // As above, no disassembler allowed.
   let isAsmParserOnly = 1;
   let Constraints = "$src = $Rd";
@@ -1416,7 +1416,7 @@ def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
 def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
                 (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
                 "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
-              Sched<[WriteALU, ReadALU]> {
+              Sched<[WriteALU, ReadALU, ReadALU]> {
   // As above, no disassembler allowed.
   let isAsmParserOnly = 1;
   let Constraints = "$src = $Rd";
@@ -1560,7 +1560,8 @@ class A64I_condcmpregImpl<bit sf, bit op, RegisterClass GPR, string asmop>
                     (outs),
                     (ins GPR:$Rn, GPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond),
                     !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"),
-                    [], NoItinerary> {
+                    [], NoItinerary>,
+    Sched<[WriteCMP, ReadCMP, ReadCMP]> {
   let Defs = [NZCV];
 }
 
@@ -1608,7 +1609,7 @@ multiclass A64I_condselSizes<bit op, bits<2> op2, string asmop,
                             !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
                             [(set i32:$Rd, (select i32:$Rn, i32:$Rm))],
                             NoItinerary>,
-               Sched<[WriteCMP, ReadCMP]>;
+               Sched<[WriteCMP, ReadCMP, ReadCMP]>;
 
 
     def xxxc : A64I_condsel<0b1, op, 0b0, op2,
@@ -1617,7 +1618,7 @@ multiclass A64I_condselSizes<bit op, bits<2> op2, string asmop,
                             !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
                             [(set i64:$Rd, (select i64:$Rn, i64:$Rm))],
                             NoItinerary>,
-               Sched<[WriteCMP, ReadCMP]>;
+               Sched<[WriteCMP, ReadCMP, ReadCMP]>;
   }
 }
 
@@ -1797,7 +1798,8 @@ multiclass dp_2src_crc<bit c, string asmop> {
   def X_wwx : A64I_dp_2src<0b1, {0, 1, 0, c, 1, 1}, 0b0,
                            !strconcat(asmop, "x\t$Rd, $Rn, $Rm"),
                            (outs GPR32:$Rd), (ins GPR32:$Rn, GPR64:$Rm), [],
-                           NoItinerary>;
+                           NoItinerary>,
+	          Sched<[WriteALU, ReadALU, ReadALU]>;
 }
 
 multiclass dp_2src_zext <bits<6> opcode, string asmop, SDPatternOperator op> {
@@ -2630,7 +2632,7 @@ let mayLoad = 1 in {
                              (outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19),
                              "prfm\t$Rt, $Imm19",
                              [], NoItinerary>,
-                 Sched<[WriteLd]>;
+                 Sched<[WriteLd, ReadLd]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2685,19 +2687,23 @@ class A64I_SRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
 multiclass A64I_SRex<string asmstr, bits<3> opcode, string prefix> {
   def _byte:  A64I_SRexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
                               (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
-                              [], NoItinerary>;
+                              [], NoItinerary>,
+              Sched<[WriteSt, ReadSt, ReadSt]>;
 
   def _hword:  A64I_SRexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
                                (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
-                               [],NoItinerary>;
+                               [],NoItinerary>,
+               Sched<[WriteSt, ReadSt, ReadSt]>;
 
   def _word:  A64I_SRexs_impl<0b10, opcode, asmstr,
                               (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
-                              [], NoItinerary>;
+                              [], NoItinerary>,
+              Sched<[WriteSt, ReadSt, ReadSt]>;
 
   def _dword: A64I_SRexs_impl<0b11, opcode, asmstr,
                               (outs GPR32:$Rs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
-                              [], NoItinerary>;
+                              [], NoItinerary>,
+              Sched<[WriteSt, ReadSt, ReadSt]>;
 }
 
 defm STXR  : A64I_SRex<"stxr",  0b000, "STXR">;
@@ -2792,22 +2798,26 @@ multiclass A64I_SLex<string asmstr, bits<3> opcode, string prefix> {
   def _byte:  A64I_SLexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
                             (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
                             [(atomic_store_release_8 i64:$Rn, i32:$Rt)],
-                            NoItinerary>;
+                            NoItinerary>,
+              Sched<[WriteSt, ReadSt, ReadSt]>;
 
   def _hword:  A64I_SLexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
                            (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
                            [(atomic_store_release_16 i64:$Rn, i32:$Rt)],
-                           NoItinerary>;
+                           NoItinerary>,
+               Sched<[WriteSt, ReadSt, ReadSt]>;
 
   def _word:  A64I_SLexs_impl<0b10, opcode, asmstr,
                            (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
                            [(atomic_store_release_32 i64:$Rn, i32:$Rt)],
-                           NoItinerary>;
+                           NoItinerary>,
+              Sched<[WriteSt, ReadSt, ReadSt]>;
 
   def _dword: A64I_SLexs_impl<0b11, opcode, asmstr,
                            (outs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
                            [(atomic_store_release_64 i64:$Rn, i64:$Rt)],
-                           NoItinerary>;
+                           NoItinerary>,
+              Sched<[WriteSt, ReadSt, ReadSt]>;
 }
 
 defm STLR  : A64I_SLex<"stlr", 0b101, "STLR">;
@@ -2832,12 +2842,14 @@ multiclass A64I_SPex<string asmstr, bits<3> opcode> {
   def _word:  A64I_SPexs_impl<0b10, opcode, asmstr, (outs),
                             (ins GPR32:$Rs, GPR32:$Rt, GPR32:$Rt2,
                                  GPR64xsp0:$Rn),
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+              Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>;
 
   def _dword: A64I_SPexs_impl<0b11, opcode, asmstr, (outs),
                             (ins GPR32:$Rs, GPR64:$Rt, GPR64:$Rt2,
                                             GPR64xsp0:$Rn),
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+              Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>;
 }
 
 defm STXP  : A64I_SPex<"stxp", 0b010>;
@@ -2865,13 +2877,13 @@ multiclass A64I_LPex<string asmstr, bits<3> opcode> {
                             (outs GPR32:$Rt, GPR32:$Rt2),
                             (ins GPR64xsp0:$Rn),
                             [], NoItinerary>,
-              Sched<[WriteLd]>;
+              Sched<[WriteLd, WriteLd, ReadLd]>;
 
   def _dword: A64I_LPexs_impl<0b11, opcode, asmstr,
                             (outs GPR64:$Rt, GPR64:$Rt2),
                             (ins GPR64xsp0:$Rn),
                             [], NoItinerary>,
-              Sched<[WriteLd]>;
+              Sched<[WriteLd, WriteLd, ReadLd]>;
 }
 
 defm LDXP  : A64I_LPex<"ldxp", 0b010>;
@@ -3085,7 +3097,8 @@ multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
   def _STR : A64I_LSunsigimm<size, v, {high_opc, 0b0},
                      (outs), (ins GPR:$Rt, GPR64xsp:$Rn, params.uimm12:$UImm12),
                      "str" # asmsuffix # "\t$Rt, [$Rn, $UImm12]",
-                     [], NoItinerary> {
+                     [], NoItinerary>,
+             Sched<[WriteSt, ReadSt, ReadSt]> {
     let mayStore = 1;
   }
   def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn]",
@@ -3126,13 +3139,15 @@ multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
                                   (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR32:$Rm,
                                                params.regextWm:$Ext),
                                   "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
-                                  [], NoItinerary>;
+                                  [], NoItinerary>,
+                            Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>;
 
     def _Xm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b1,
                                   (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR64:$Rm,
                                                params.regextXm:$Ext),
                                   "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
-                                  [], NoItinerary>;
+                                  [], NoItinerary>,
+                            Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>;
   }
   def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn, $Rm]",
       (!cast<Instruction>(prefix # "_Xm_RegOffset_STR") GPR:$Rt, GPR64xsp:$Rn,
@@ -3142,7 +3157,8 @@ multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
   def _STUR : A64I_LSunalimm<size, v, {high_opc, 0b0},
                              (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
                              "stur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
-                             [], NoItinerary> {
+                             [], NoItinerary>,
+              Sched<[WriteSt, ReadSt, ReadSt]> {
     let mayStore = 1;
   }
   def : InstAlias<"stur" # asmsuffix # " $Rt, [$Rn]",
@@ -3163,7 +3179,8 @@ multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
                                (outs GPR64xsp:$Rn_wb),
                                (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
                                "str" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
-                               [], NoItinerary> {
+                               [], NoItinerary>,
+                     Sched<[WriteSt, ReadSt, ReadSt]> {
     let Constraints = "$Rn = $Rn_wb";
     let mayStore = 1;
 
@@ -3176,7 +3193,7 @@ multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
                                     (ins GPR64xsp:$Rn, simm9:$SImm9),
                                     "ldr" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
                                     [], NoItinerary>,
-                     Sched<[WriteLd, ReadLd]> {
+                     Sched<[WriteLd, WriteLd, ReadLd]> {
     let mayLoad = 1;
     let Constraints = "$Rn = $Rn_wb";
     let DecoderMethod = "DecodeSingleIndexedInstruction";
@@ -3187,7 +3204,8 @@ multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
                                (outs GPR64xsp:$Rn_wb),
                                (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
                                "str" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
-                               [], NoItinerary> {
+                               [], NoItinerary>,
+                    Sched<[WriteSt, ReadSt, ReadSt]> {
     let Constraints = "$Rn = $Rn_wb";
     let mayStore = 1;
 
@@ -3200,7 +3218,7 @@ multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
                                     (ins GPR64xsp:$Rn, simm9:$SImm9),
                                     "ldr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
                                     [], NoItinerary>,
-                    Sched<[WriteLd, ReadLd]> {
+                    Sched<[WriteLd, WriteLd, ReadLd]> {
     let mayLoad = 1;
     let Constraints = "$Rn = $Rn_wb";
     let DecoderMethod = "DecodeSingleIndexedInstruction";
@@ -3340,7 +3358,7 @@ multiclass A64I_LDR_signed<bits<2> size, string asmopcode, AddrParams params,
                                  (ins GPR64xsp:$Rn, simm9:$SImm9),
                                  "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
                                  [], NoItinerary>,
-                    Sched<[WriteLd, ReadLd]> {
+                    Sched<[WriteLd, WriteLd, ReadLd]> {
       let Constraints = "$Rn = $Rn_wb";
       let DecoderMethod = "DecodeSingleIndexedInstruction";
     }
@@ -3350,7 +3368,7 @@ multiclass A64I_LDR_signed<bits<2> size, string asmopcode, AddrParams params,
                                    (ins GPR64xsp:$Rn, simm9:$SImm9),
                                    "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
                                    [], NoItinerary>,
-                    Sched<[WriteLd, ReadLd]> {
+                    Sched<[WriteLd, WriteLd, ReadLd]> {
       let Constraints = "$Rn = $Rn_wb";
       let DecoderMethod = "DecodeSingleIndexedInstruction";
     }
@@ -3361,7 +3379,7 @@ multiclass A64I_LDR_signed<bits<2> size, string asmopcode, AddrParams params,
                                  (ins GPR64xsp:$Rn, simm9:$SImm9),
                                  "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
                                  [], NoItinerary>,
-                   Sched<[WriteLd, ReadLd]> {
+                   Sched<[WriteLd, WriteLd, ReadLd]> {
       let Constraints = "$Rn = $Rn_wb";
       let DecoderMethod = "DecodeSingleIndexedInstruction";
     }
@@ -3371,7 +3389,7 @@ multiclass A64I_LDR_signed<bits<2> size, string asmopcode, AddrParams params,
                                  (ins GPR64xsp:$Rn, simm9:$SImm9),
                                  "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
                                  [], NoItinerary>,
-                   Sched<[WriteLd, ReadLd]> {
+                   Sched<[WriteLd, WriteLd, ReadLd]> {
       let Constraints = "$Rn = $Rn_wb";
       let DecoderMethod = "DecodeSingleIndexedInstruction";
     }
@@ -3431,7 +3449,7 @@ def LDRSWx_PostInd
                     (ins GPR64xsp:$Rn, simm9:$SImm9),
                     "ldrsw\t$Rt, [$Rn], $SImm9",
                     [], NoItinerary>,
-      Sched<[WriteLd, ReadLd]> {
+      Sched<[WriteLd, WriteLd, ReadLd]> {
   let mayLoad = 1;
   let Constraints = "$Rn = $Rn_wb";
   let DecoderMethod = "DecodeSingleIndexedInstruction";
@@ -3442,7 +3460,7 @@ def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10,
                                  (ins GPR64xsp:$Rn, simm9:$SImm9),
                                  "ldrsw\t$Rt, [$Rn, $SImm9]!",
                                  [], NoItinerary>,
-                    Sched<[WriteLd, ReadLd]> {
+                    Sched<[WriteLd, WriteLd, ReadLd]> {
   let mayLoad = 1;
   let Constraints = "$Rn = $Rn_wb";
   let DecoderMethod = "DecodeSingleIndexedInstruction";
@@ -3652,7 +3670,7 @@ multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg,
                             (outs SomeReg:$Rt, SomeReg:$Rt2),
                             (ins GPR64xsp:$Rn, simm7:$SImm7),
                             "ldp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
-             Sched<[WriteLd, ReadLd]> {
+             Sched<[WriteLd, WriteLd, ReadLd]> {
     let mayLoad = 1;
     let DecoderMethod = "DecodeLDSTPairInstruction";
   }
@@ -3666,7 +3684,8 @@ multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg,
                                     GPR64xsp:$Rn,
                                     simm7:$SImm7),
                                "stp\t$Rt, $Rt2, [$Rn], $SImm7",
-                               [], NoItinerary> {
+                               [], NoItinerary>,
+                     Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> {
     let mayStore = 1;
     let Constraints = "$Rn = $Rn_wb";
 
@@ -3679,16 +3698,17 @@ multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg,
                         (ins GPR64xsp:$Rn, simm7:$SImm7),
                         "ldp\t$Rt, $Rt2, [$Rn], $SImm7",
                         [], NoItinerary>,
-                     Sched<[WriteLd, ReadLd]> {
+                     Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> {
     let mayLoad = 1;
     let Constraints = "$Rn = $Rn_wb";
     let DecoderMethod = "DecodeLDSTPairInstruction";
   }
 
   def _PreInd_STR : A64I_LSPpreind<opc, v, 0b0, (outs GPR64xsp:$Rn_wb),
-                    (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
-                    "stp\t$Rt, $Rt2, [$Rn, $SImm7]!",
-                    [], NoItinerary> {
+                       (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
+                       "stp\t$Rt, $Rt2, [$Rn, $SImm7]!",
+                       [], NoItinerary>,
+                    Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> {
     let mayStore = 1;
     let Constraints = "$Rn = $Rn_wb";
     let DecoderMethod = "DecodeLDSTPairInstruction";
@@ -3699,15 +3719,16 @@ multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg,
                               (ins GPR64xsp:$Rn, simm7:$SImm7),
                               "ldp\t$Rt, $Rt2, [$Rn, $SImm7]!",
                               [], NoItinerary>,
-                    Sched<[WriteLd, ReadLd]> {
+                    Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> {
     let mayLoad = 1;
     let Constraints = "$Rn = $Rn_wb";
     let DecoderMethod = "DecodeLDSTPairInstruction";
   }
 
   def _NonTemp_STR : A64I_LSPnontemp<opc, v, 0b0, (outs),
-                    (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
-                    "stnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary> {
+                       (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
+                       "stnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
+                     Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> {
     let mayStore = 1;
     let DecoderMethod = "DecodeLDSTPairInstruction";
   }
@@ -3719,7 +3740,7 @@ multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg,
                             (outs SomeReg:$Rt, SomeReg:$Rt2),
                             (ins GPR64xsp:$Rn, simm7:$SImm7),
                             "ldnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
-                     Sched<[WriteLd, ReadLd]> {
+                     Sched<[WriteLd, WriteLd, ReadLd]> {
     let mayLoad = 1;
     let DecoderMethod = "DecodeLDSTPairInstruction";
   }
@@ -3745,7 +3766,7 @@ def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1,
                            (outs GPR64:$Rt, GPR64:$Rt2),
                            (ins GPR64xsp:$Rn, word_simm7:$SImm7),
                            "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
-             Sched<[WriteLd, ReadLd]> {
+             Sched<[WriteLd, WriteLd, ReadLd]> {
   let mayLoad = 1;
   let DecoderMethod = "DecodeLDSTPairInstruction";
 }
@@ -3756,7 +3777,8 @@ def LDPSWx_PostInd : A64I_LSPpostind<0b01, 0b0, 0b1,
                                   (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb),
                                   (ins GPR64xsp:$Rn, word_simm7:$SImm7),
                                   "ldpsw\t$Rt, $Rt2, [$Rn], $SImm7",
-                                  [], NoItinerary> {
+                                  [], NoItinerary>,
+                     Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> {
   let mayLoad = 1;
   let Constraints = "$Rn = $Rn_wb";
   let DecoderMethod = "DecodeLDSTPairInstruction";
@@ -3767,7 +3789,7 @@ def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1,
                                    (ins GPR64xsp:$Rn, word_simm7:$SImm7),
                                    "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!",
                                    [], NoItinerary>,
-                    Sched<[WriteLd, ReadLd]> {
+                    Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> {
   let mayLoad = 1;
   let Constraints = "$Rn = $Rn_wb";
   let DecoderMethod = "DecodeLDSTPairInstruction";
@@ -4150,7 +4172,8 @@ let isMoveImm = 1, isReMaterializable = 1,
                              (ins movz64_imm:$FullImm)>;
 }
 
-let Constraints = "$src = $Rd" in
+let Constraints = "$src = $Rd",
+    SchedRW = [WriteALU, ReadALU] in
 defm MOVK : A64I_movwSizes<0b11, "movk",
                            (ins GPR32:$src, movk32_imm:$FullImm),
                            (ins GPR64:$src, movk64_imm:$FullImm)>;
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
index 3b919b388b2..0b97e3bdf5a 100644
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ b/lib/Target/AArch64/AArch64InstrNEON.td
@@ -122,14 +122,16 @@ multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size,  bits<5> opcode,
                asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
                [(set (v8i8 VPR64:$Rd),
                   (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
-               NoItinerary>;
+               NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def _16B : NeonI_3VSame<0b1, u, size, opcode,
                (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
                asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
                [(set (v16i8 VPR128:$Rd),
                   (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
-               NoItinerary>;
+               NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
   }
 
 }
@@ -143,28 +145,32 @@ multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
               asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
               [(set (v4i16 VPR64:$Rd),
                  (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
-              NoItinerary>;
+              NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
               asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
               [(set (v8i16 VPR128:$Rd),
                  (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
-              NoItinerary>;
+              NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
               asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
               [(set (v2i32 VPR64:$Rd),
                  (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
-              NoItinerary>;
+              NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
               asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
               [(set (v4i32 VPR128:$Rd),
                  (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
-              NoItinerary>;
+              NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
   }
 }
 multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
@@ -177,14 +183,16 @@ multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
                asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
                [(set (v8i8 VPR64:$Rd),
                   (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
-               NoItinerary>;
+               NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
                (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
                asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
                [(set (v16i8 VPR128:$Rd),
                   (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
-               NoItinerary>;
+               NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
   }
 }
 
@@ -198,7 +206,8 @@ multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
               asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
               [(set (v2i64 VPR128:$Rd),
                  (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
-              NoItinerary>;
+              NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
   }
 }
 
@@ -214,21 +223,24 @@ multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
               asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
               [(set (ResTy2S VPR64:$Rd),
                  (ResTy2S (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
-              NoItinerary>;
+              NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
               asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
               [(set (ResTy4S VPR128:$Rd),
                  (ResTy4S (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
-              NoItinerary>;
+              NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
               asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
               [(set (ResTy2D VPR128:$Rd),
                  (ResTy2D (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
-               NoItinerary>;
+              NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
   }
 }
 
@@ -286,9 +298,11 @@ def : Pat<(v1i32 (sub FPR32:$Rn, FPR32:$Rm)),
 
 // Vector Multiply (Integer and Floating-Point)
 
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
 defm MULvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
 defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul,
                                      v2f32, v4f32, v2f64, 1>;
+}
 
 // Patterns to match mul of v1i8/v1i16/v1i32 types
 def : Pat<(v1i8 (mul FPR8:$Rn, FPR8:$Rm)),
@@ -309,8 +323,10 @@ def : Pat<(v1i32 (mul FPR32:$Rn, FPR32:$Rm)),
 
 // Vector Multiply (Polynomial)
 
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
 defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
                                     int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
+}
 
 // Vector Multiply-accumulate and Multiply-subtract (Integer)
 
@@ -324,7 +340,8 @@ class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
     asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
     [(set (OpTy VPRC:$Rd),
        (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
-    NoItinerary> {
+    NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
 }
 
@@ -335,6 +352,7 @@ def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
                        (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
 
 
+let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC] in {
 def MLAvvv_8B:  NeonI_3VSame_Constraint_impl<"mla", ".8b",  VPR64,  v8i8,
                                              0b0, 0b0, 0b00, 0b10010, Neon_mla>;
 def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
@@ -360,6 +378,7 @@ def MLSvvv_2S:  NeonI_3VSame_Constraint_impl<"mls", ".2s",  VPR64,  v2i32,
                                              0b0, 0b1, 0b10, 0b10010, Neon_mls>;
 def MLSvvv_4S:  NeonI_3VSame_Constraint_impl<"mls", ".4s",  VPR128, v4i32,
                                              0b1, 0b1, 0b10, 0b10010, Neon_mls>;
+}
 
 // Vector Multiply-accumulate and Multiply-subtract (Floating Point)
 
@@ -369,7 +388,8 @@ def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
 def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
                         (fsub node:$Ra, (fmul_su node:$Rn, node:$Rm))>;
 
-let Predicates = [HasNEON, UseFusedMAC] in {
+let Predicates = [HasNEON, UseFusedMAC],
+    SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC] in {
 def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s",  VPR64,  v2f32,
                                              0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
 def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s",  VPR128, v4f32,
@@ -403,8 +423,10 @@ def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
 
 // Vector Divide (Floating-Point)
 
+let SchedRW = [WriteFPDiv, ReadFPDiv, ReadFPDiv] in {
 defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv,
                                      v2f32, v4f32, v2f64, 0>;
+}
 
 // Vector Bitwise Operations
 
@@ -770,49 +792,56 @@ multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
              asmop # "\t$Rd.8b, $Rn.8b, $Imm",
              [(set (v8i8 VPR64:$Rd),
                 (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
-             NoItinerary>;
+             NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
              (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
              asmop # "\t$Rd.16b, $Rn.16b, $Imm",
              [(set (v16i8 VPR128:$Rd),
                 (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
-             NoItinerary>;
+             NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
             (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
             asmop # "\t$Rd.4h, $Rn.4h, $Imm",
             [(set (v4i16 VPR64:$Rd),
                (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
-            NoItinerary>;
+            NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 
   def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
             asmop # "\t$Rd.8h, $Rn.8h, $Imm",
             [(set (v8i16 VPR128:$Rd),
                (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
-            NoItinerary>;
+            NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 
   def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
             (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
             asmop # "\t$Rd.2s, $Rn.2s, $Imm",
             [(set (v2i32 VPR64:$Rd),
                (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
-            NoItinerary>;
+            NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 
   def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
             asmop # "\t$Rd.4s, $Rn.4s, $Imm",
             [(set (v4i32 VPR128:$Rd),
                (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
-            NoItinerary>;
+            NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 
   def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
             asmop # "\t$Rd.2d, $Rn.2d, $Imm",
             [(set (v2i64 VPR128:$Rd),
                (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
-            NoItinerary>;
+            NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 // Vector Compare Mask Equal to Zero (Integer)
@@ -879,21 +908,24 @@ multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
             asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
             [(set (v2i32 VPR64:$Rd),
                (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpzz32:$FPImm), CC)))],
-            NoItinerary>;
+            NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 
   def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
             (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm),
             asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
             [(set (v4i32 VPR128:$Rd),
                (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))],
-            NoItinerary>;
+            NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 
   def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
             (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm),
             asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
             [(set (v2i64 VPR128:$Rd),
                (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))],
-            NoItinerary>;
+            NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 // Vector Compare Mask Equal to Zero (Floating Point)
@@ -1051,6 +1083,7 @@ defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
                                        int_arm_neon_vpadd,
                                        v2f32, v4f32, v2f64, 1>;
 
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
 // Vector Saturating Doubling Multiply High
 defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
                     int_arm_neon_vqdmulh, 1>;
@@ -1063,6 +1096,7 @@ defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
 defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
                                       int_aarch64_neon_vmulx,
                                       v2f32, v4f32, v2f64, 1>;
+}
 
 // Patterns to match llvm.aarch64.* intrinsic for 
 // ADDP, SMINP, UMINP, SMAXP, UMAXP having i32 as output
@@ -1202,7 +1236,8 @@ multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
                               [(set (v2i32 VPR64:$Rd),
                                  (v2i32 (opnode (timm:$Imm),
                                    (neon_mov_imm_LSL_operand:$Simm))))],
-                              NoItinerary> {
+                              NoItinerary>,
+               Sched<[WriteFPALU]> {
        bits<2> Simm;
        let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
      }
@@ -1215,7 +1250,8 @@ multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
                               [(set (v4i32 VPR128:$Rd),
                                  (v4i32 (opnode (timm:$Imm),
                                    (neon_mov_imm_LSL_operand:$Simm))))],
-                              NoItinerary> {
+                              NoItinerary>,
+               Sched<[WriteFPALU]> {
       bits<2> Simm;
       let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
     }
@@ -1229,7 +1265,8 @@ multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
                               [(set (v4i16 VPR64:$Rd),
                                  (v4i16 (opnode (timm:$Imm),
                                    (neon_mov_imm_LSLH_operand:$Simm))))],
-                              NoItinerary> {
+                              NoItinerary>,
+               Sched<[WriteFPALU]> {
       bit  Simm;
       let cmode = {0b1, 0b0, Simm, 0b0};
     }
@@ -1242,7 +1279,8 @@ multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
                               [(set (v8i16 VPR128:$Rd),
                                  (v8i16 (opnode (timm:$Imm),
                                    (neon_mov_imm_LSLH_operand:$Simm))))],
-                              NoItinerary> {
+                              NoItinerary>,
+               Sched<[WriteFPALU]> {
       bit Simm;
       let cmode = {0b1, 0b0, Simm, 0b0};
      }
@@ -1263,7 +1301,8 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
                     (v2i32 (opnode (v2i32 VPR64:$src),
                       (v2i32 (neonopnode timm:$Imm,
                         neon_mov_imm_LSL_operand:$Simm)))))],
-                 NoItinerary> {
+                 NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU]> {
       bits<2> Simm;
       let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
     }
@@ -1277,7 +1316,8 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
                     (v4i32 (opnode (v4i32 VPR128:$src),
                       (v4i32 (neonopnode timm:$Imm,
                         neon_mov_imm_LSL_operand:$Simm)))))],
-                 NoItinerary> {
+                 NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU]> {
       bits<2> Simm;
       let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
     }
@@ -1292,7 +1332,8 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
                     (v4i16 (opnode (v4i16 VPR64:$src),
                        (v4i16 (neonopnode timm:$Imm,
                           neon_mov_imm_LSL_operand:$Simm)))))],
-                 NoItinerary> {
+                 NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU]> {
       bit  Simm;
       let cmode = {0b1, 0b0, Simm, 0b1};
     }
@@ -1306,7 +1347,8 @@ multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
                     (v8i16 (opnode (v8i16 VPR128:$src),
                       (v8i16 (neonopnode timm:$Imm,
                         neon_mov_imm_LSL_operand:$Simm)))))],
-                 NoItinerary> {
+                 NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU]> {
       bit Simm;
       let cmode = {0b1, 0b0, Simm, 0b1};
     }
@@ -1325,7 +1367,8 @@ multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
                               [(set (v2i32 VPR64:$Rd),
                                  (v2i32 (opnode (timm:$Imm),
                                    (neon_mov_imm_MSL_operand:$Simm))))],
-                             NoItinerary> {
+                             NoItinerary>,
+               Sched<[WriteFPALU]> {
        bit Simm;
        let cmode = {0b1, 0b1, 0b0, Simm};
      }
@@ -1338,7 +1381,8 @@ multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
                               [(set (v4i32 VPR128:$Rd),
                                  (v4i32 (opnode (timm:$Imm),
                                    (neon_mov_imm_MSL_operand:$Simm))))],
-                              NoItinerary> {
+                              NoItinerary>,
+              Sched<[WriteFPALU]> {
      bit Simm;
      let cmode = {0b1, 0b1, 0b0, Simm};
    }
@@ -1565,7 +1609,8 @@ def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
                                "movi\t$Rd.8b, $Imm",
                                [(set (v8i8 VPR64:$Rd),
                                   (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
-                                NoItinerary> {
+                                NoItinerary>,
+                Sched<[WriteFPALU]> {
   let cmode = 0b1110;
 }
 
@@ -1574,7 +1619,8 @@ def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
                                 "movi\t$Rd.16b, $Imm",
                                 [(set (v16i8 VPR128:$Rd),
                                    (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
-                                 NoItinerary> {
+                                 NoItinerary>,
+                Sched<[WriteFPALU]> {
   let cmode = 0b1110;
 }
 }
@@ -1586,7 +1632,8 @@ def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
                                "movi\t $Rd.2d, $Imm",
                                [(set (v2i64 VPR128:$Rd),
                                   (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
-                               NoItinerary> {
+                               NoItinerary>,
+                Sched<[WriteFPALU]> {
   let cmode = 0b1110;
 }
 }
@@ -1599,7 +1646,8 @@ def MOVIdi : NeonI_1VModImm<0b0, 0b1,
                            "movi\t $Rd, $Imm",
                            [(set (v1i64 FPR64:$Rd),
                              (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))],
-                           NoItinerary> {
+                           NoItinerary>,
+             Sched<[WriteFPALU]> {
   let cmode = 0b1110;
 }
 }
@@ -1613,7 +1661,8 @@ class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
                    "fmov\t$Rd" # asmlane # ", $Imm",
                    [(set (OpTy VPRC:$Rd),
                       (OpTy (Neon_fmovi (timm:$Imm))))],
-                   NoItinerary> {
+                   NoItinerary>,
+    Sched<[WriteFPALU]> {
      let cmode = 0b1111;
    }
 
@@ -1692,7 +1741,8 @@ class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
                      [(set (Ty VPRC:$Rd),
                         (Ty (OpNode (Ty VPRC:$Rn),
                           (Ty (Neon_vdup (i32 ImmTy:$Imm))))))],
-                     NoItinerary>;
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
   // 64-bit vector types.
@@ -1873,7 +1923,8 @@ class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
                         (DestTy (shl
                           (DestTy (ExtOp (SrcTy VPR64:$Rn))),
                             (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
-                     NoItinerary>;
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
                        string SrcT, ValueType DestTy, ValueType SrcTy,
@@ -1887,7 +1938,8 @@ class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
                           (DestTy (ExtOp
                             (SrcTy (getTop VPR128:$Rn)))),
                               (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
-                     NoItinerary>;
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
                          SDNode ExtOp> {
@@ -1988,7 +2040,8 @@ class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
                      [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
                         (i32 ImmTy:$Imm))))],
-                     NoItinerary>;
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 // shift right (vector by immediate)
 multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
@@ -2091,7 +2144,8 @@ class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
            [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
               (Ty (OpNode (Ty VPRC:$Rn),
                 (Ty (Neon_vdup (i32 ImmTy:$Imm))))))))],
-           NoItinerary> {
+           NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
 }
 
@@ -2146,7 +2200,8 @@ class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
                      [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
                         (Ty (OpNode (Ty VPRC:$Rn), (i32 ImmTy:$Imm))))))],
-                     NoItinerary> {
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
 }
 
@@ -2201,7 +2256,8 @@ class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
            asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
            [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
              (i32 ImmTy:$Imm))))],
-           NoItinerary> {
+           NoItinerary>,
+      Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
 }
 
@@ -2295,14 +2351,16 @@ class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
   : NeonI_2VShiftImm<q, u, opcode,
                      (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
-                     [], NoItinerary>;
+                     [], NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
                        string SrcT, Operand ImmTy>
   : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
                      (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
                      asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
-                     [], NoItinerary> {
+                     [], NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
 }
 
@@ -2461,7 +2519,8 @@ class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
                      asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
                      [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
                        (i32 ImmTy:$Imm))))],
-                     NoItinerary>;
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
                               SDPatternOperator IntOp> {
@@ -2539,28 +2598,32 @@ multiclass NeonI_2VAcross_1<bit u, bits<5> opcode,
                 asmop # "\t$Rd, $Rn.8b",
                 [(set (v1i16 FPR16:$Rd),
                     (v1i16 (opnode (v8i8 VPR64:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 
     def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
                 (outs FPR16:$Rd), (ins VPR128:$Rn),
                 asmop # "\t$Rd, $Rn.16b",
                 [(set (v1i16 FPR16:$Rd),
                     (v1i16 (opnode (v16i8 VPR128:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 
     def _1s4h:  NeonI_2VAcross<0b0, u, 0b01, opcode,
                 (outs FPR32:$Rd), (ins VPR64:$Rn),
                 asmop # "\t$Rd, $Rn.4h",
                 [(set (v1i32 FPR32:$Rd),
                     (v1i32 (opnode (v4i16 VPR64:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 
     def _1s8h:  NeonI_2VAcross<0b1, u, 0b01, opcode,
                 (outs FPR32:$Rd), (ins VPR128:$Rn),
                 asmop # "\t$Rd, $Rn.8h",
                 [(set (v1i32 FPR32:$Rd),
                     (v1i32 (opnode (v8i16 VPR128:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 
     // _1d2s doesn't exist!
 
@@ -2569,7 +2632,8 @@ multiclass NeonI_2VAcross_1<bit u, bits<5> opcode,
                 asmop # "\t$Rd, $Rn.4s",
                 [(set (v1i64 FPR64:$Rd),
                     (v1i64 (opnode (v4i32 VPR128:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>;
@@ -2585,28 +2649,32 @@ multiclass NeonI_2VAcross_2<bit u, bits<5> opcode,
                 asmop # "\t$Rd, $Rn.8b",
                 [(set (v1i8 FPR8:$Rd),
                     (v1i8 (opnode (v8i8 VPR64:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 
     def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
                 (outs FPR8:$Rd), (ins VPR128:$Rn),
                 asmop # "\t$Rd, $Rn.16b",
                 [(set (v1i8 FPR8:$Rd),
                     (v1i8 (opnode (v16i8 VPR128:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 
     def _1h4h:  NeonI_2VAcross<0b0, u, 0b01, opcode,
                 (outs FPR16:$Rd), (ins VPR64:$Rn),
                 asmop # "\t$Rd, $Rn.4h",
                 [(set (v1i16 FPR16:$Rd),
                     (v1i16 (opnode (v4i16 VPR64:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 
     def _1h8h:  NeonI_2VAcross<0b1, u, 0b01, opcode,
                 (outs FPR16:$Rd), (ins VPR128:$Rn),
                 asmop # "\t$Rd, $Rn.8h",
                 [(set (v1i16 FPR16:$Rd),
                     (v1i16 (opnode (v8i16 VPR128:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 
     // _1s2s doesn't exist!
 
@@ -2615,7 +2683,8 @@ multiclass NeonI_2VAcross_2<bit u, bits<5> opcode,
                 asmop # "\t$Rd, $Rn.4s",
                 [(set (v1i32 FPR32:$Rd),
                     (v1i32 (opnode (v4i32 VPR128:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>;
@@ -2635,7 +2704,8 @@ multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size,
                 asmop # "\t$Rd, $Rn.4s",
                 [(set (f32 FPR32:$Rd),
                     (f32 (opnode (v4f32 VPR128:$Rn))))],
-                NoItinerary>;
+                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv",
@@ -2658,7 +2728,8 @@ class NeonI_Permute<bit q, bits<2> size, bits<3> opcode,
                asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS,
                [(set (Ty OpVPR:$Rd),
                   (Ty (opnode (Ty OpVPR:$Rn), (Ty OpVPR:$Rm))))],
-               NoItinerary>;
+               NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
 multiclass NeonI_Perm_pat<bits<3> opcode, string asmop,
                           SDPatternOperator opnode> {
@@ -2717,7 +2788,8 @@ class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
                  [(set (ResTy VPR128:$Rd),
                     (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
                                    (ResTy (ext (OpTy OpVPR:$Rm))))))],
-                 NoItinerary>;
+                 NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
 multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
                         string asmop, SDPatternOperator opnode,
@@ -2792,7 +2864,8 @@ class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
                  [(set (ResTy VPR128:$Rd),
                     (ResTy (opnode (ResTy VPR128:$Rn),
                                    (ResTy (ext (OpTy OpVPR:$Rm))))))],
-                 NoItinerary>;
+                 NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
 multiclass NeonI_3VDW_s<bit u, bits<4> opcode, string asmop,
                         SDPatternOperator opnode> {
@@ -2873,7 +2946,8 @@ class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
                     (ResTy (get_hi
                       (OpTy (opnode (OpTy VPR128:$Rn),
                                     (OpTy VPR128:$Rm))))))],
-                 NoItinerary>;
+                 NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
 multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode, string asmop,
                                 SDPatternOperator opnode, bit Commutable = 0> {
@@ -2901,7 +2975,8 @@ class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
                  [(set (ResTy ResVPR:$Rd),
                     (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
-                 NoItinerary>;
+                 NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
 // normal narrow pattern
 multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode, string asmop,
@@ -2925,7 +3000,8 @@ class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
   : NeonI_3VDiff<q, u, size, opcode,
                  (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
-                 [], NoItinerary> {
+                 [], NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
   let neverHasSideEffects = 1;
 }
@@ -2990,7 +3066,8 @@ class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
                  [(set (ResTy VPR128:$Rd),
                     (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
                                                 (OpTy OpVPR:$Rm))))))],
-                 NoItinerary>;
+                 NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
 multiclass NeonI_3VDL_zext<bit u, bits<4> opcode, string asmop,
                            SDPatternOperator opnode, bit Commutable = 0> {
@@ -3058,7 +3135,8 @@ class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
                       (ResTy VPR128:$src),
                       (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
                                                  (OpTy OpVPR:$Rm))))))))],
-                 NoItinerary> {
+                 NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
 }
 
@@ -3098,7 +3176,8 @@ defm UABAL2vvv :  NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
 // Long pattern with 2 operands
 multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode, string asmop,
                           SDPatternOperator opnode, bit Commutable = 0> {
-  let isCommutable = Commutable in {
+  let isCommutable = Commutable,
+      SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
     def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
                               opnode, VPR128, VPR64, v8i16, v8i8>;
     def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
@@ -3120,7 +3199,8 @@ class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
                  [(set (ResTy VPR128:$Rd),
                     (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
-                 NoItinerary>;
+                 NoItinerary>,
+    Sched<[WriteFPMul, ReadFPMul, ReadFPMul]>;
 
 multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode, string asmop,
                                    string opnode, bit Commutable = 0> {
@@ -3154,7 +3234,8 @@ class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
                     (ResTy (opnode
                       (ResTy VPR128:$src),
                       (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
-               NoItinerary> {
+               NoItinerary>,
+    Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> {
   let Constraints = "$src = $Rd";
 }
 
@@ -3202,7 +3283,8 @@ class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
                   (ResTy (subop
                     (ResTy VPR128:$src),
                     (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
-               NoItinerary> {
+               NoItinerary>,
+    Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> {
   let Constraints = "$src = $Rd";
 }
 
@@ -3254,8 +3336,10 @@ multiclass NeonI_3VDL_v2<bit u, bits<4> opcode, string asmop,
   }
 }
 
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
 defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
                                 int_arm_neon_vqdmull, 1>;
+}
 
 multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode, string asmop,
                                    string opnode, bit Commutable = 0> {
@@ -3299,6 +3383,7 @@ multiclass NeonI_3VDL_v3<bit u, bits<4> opcode, string asmop,
   }
 }
 
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in
 defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp,
                               int_aarch64_neon_vmull_p64, 1>;
 
@@ -3319,7 +3404,8 @@ multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop,
                           (i64 (vector_extract (v2i64 VPR128:$Rn), 1)))),
                         (v1i64 (scalar_to_vector
                           (i64 (vector_extract (v2i64 VPR128:$Rm), 1)))))))],
-                   NoItinerary>;
+                   NoItinerary>,
+      Sched<[WriteFPMul, ReadFPMul, ReadFPMul]>;
   }
 
   def : Pat<(v16i8 (int_aarch64_neon_vmull_p64
@@ -3355,7 +3441,8 @@ class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size,
                  (outs VecList:$Rt), (ins GPR64xsp:$Rn),
                  asmop # "\t$Rt, [$Rn]",
                  [],
-                 NoItinerary> {
+                 NoItinerary>,
+    Sched<[WriteVecLd, ReadVecLd]> {
   let mayLoad = 1;
   let neverHasSideEffects = 1;
 }
@@ -3409,7 +3496,8 @@ class NeonI_STVList<bit q, bits<4> opcode, bits<2> size,
                  (outs), (ins GPR64xsp:$Rn, VecList:$Rt),
                  asmop # "\t$Rt, [$Rn]",
                  [],
-                 NoItinerary> {
+                 NoItinerary>,
+    Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> {
   let mayStore = 1;
   let neverHasSideEffects = 1;
 }
@@ -3642,7 +3730,8 @@ multiclass NeonI_LDWB_VList<bit q, bits<4> opcode, bits<2> size,
                      (ins GPR64xsp:$Rn, ImmTy:$amt),
                      asmop # "\t$Rt, [$Rn], $amt",
                      [],
-                     NoItinerary> {
+                     NoItinerary>,
+                 Sched<[WriteVecLd, WriteVecLd, ReadVecLd]> {
       let Rm = 0b11111;
     }
 
@@ -3651,7 +3740,8 @@ multiclass NeonI_LDWB_VList<bit q, bits<4> opcode, bits<2> size,
                         (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
                         asmop # "\t$Rt, [$Rn], $Rm",
                         [],
-                        NoItinerary>;
+                        NoItinerary>,
+                    Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]>;
   }
 }
 
@@ -3725,7 +3815,8 @@ multiclass NeonI_STWB_VList<bit q, bits<4> opcode, bits<2> size,
                      (ins GPR64xsp:$Rn, ImmTy:$amt, VecList:$Rt),
                      asmop # "\t$Rt, [$Rn], $amt",
                      [],
-                     NoItinerary> {
+                     NoItinerary>,
+                 Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> {
       let Rm = 0b11111;
     }
 
@@ -3734,7 +3825,8 @@ multiclass NeonI_STWB_VList<bit q, bits<4> opcode, bits<2> size,
                       (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VecList:$Rt),
                       asmop # "\t$Rt, [$Rn], $Rm",
                       [],
-                      NoItinerary>;
+                      NoItinerary>,
+                    Sched<[WriteVecSt, ReadVecSt, ReadVecSt, ReadVecSt]>;
   }
 }
 
@@ -3838,7 +3930,8 @@ class NeonI_LDN_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
                       (outs VecList:$Rt), (ins GPR64xsp:$Rn),
                       asmop # "\t$Rt, [$Rn]",
                       [],
-                      NoItinerary> {
+                      NoItinerary>,
+      Sched<[WriteVecLd, ReadVecLd]> {
   let mayLoad = 1;
   let neverHasSideEffects = 1;
 }
@@ -3932,7 +4025,8 @@ class NeonI_LDN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
                          (ins GPR64xsp:$Rn, VList:$src, ImmOp:$lane),
                          asmop # "\t$Rt[$lane], [$Rn]",
                          [],
-                         NoItinerary> {
+                         NoItinerary>,
+      Sched<[WriteVecLd, ReadVecLd, ReadVecLd]> {
   let mayLoad = 1;
   let neverHasSideEffects = 1;
   let hasExtraDefRegAllocReq = 1;
@@ -4017,7 +4111,8 @@ class NeonI_STN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
                          (outs), (ins GPR64xsp:$Rn, VList:$Rt, ImmOp:$lane),
                          asmop # "\t$Rt[$lane], [$Rn]",
                          [],
-                         NoItinerary> {
+                         NoItinerary>,
+      Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> {
   let mayStore = 1;
   let neverHasSideEffects = 1;
   let hasExtraDefRegAllocReq = 1;
@@ -4109,16 +4204,18 @@ multiclass NeonI_LDN_WB_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
                       (ins GPR64xsp:$Rn, ImmTy:$amt),
                       asmop # "\t$Rt, [$Rn], $amt",
                       [],
-                      NoItinerary> {
-                        let Rm = 0b11111;
-                      }
+                      NoItinerary>,
+                 Sched<[WriteVecLd, WriteVecLd, ReadVecLd]> {
+      let Rm = 0b11111;
+    }
 
     def _register : NeonI_LdOne_Dup_Post<q, r, opcode, size,
                       (outs VecList:$Rt, GPR64xsp:$wb),
                       (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
                       asmop # "\t$Rt, [$Rn], $Rm",
                       [],
-                      NoItinerary>;
+                      NoItinerary>,
+                    Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]>;
   }
 }
 
@@ -4182,7 +4279,8 @@ let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1,
                                     VList:$src, ImmOp:$lane),
                                 asmop # "\t$Rt[$lane], [$Rn], $amt",
                                 [],
-                                NoItinerary> {
+                                NoItinerary>,
+        Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]> {
     let Rm = 0b11111;
   }
 
@@ -4194,7 +4292,8 @@ let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1,
                                     VList:$src, ImmOp:$lane),
                                 asmop # "\t$Rt[$lane], [$Rn], $Rm",
                                 [],
-                                NoItinerary>;
+                                NoItinerary>,
+        Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd, ReadVecLd]>;
 }
 
 multiclass LD_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
@@ -4282,7 +4381,8 @@ let mayStore = 1, neverHasSideEffects = 1,
                                     VList:$Rt, ImmOp:$lane),
                                 asmop # "\t$Rt[$lane], [$Rn], $amt",
                                 [],
-                                NoItinerary> {
+                                NoItinerary>,
+        Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> {
     let Rm = 0b11111;
   }
 
@@ -4294,7 +4394,8 @@ let mayStore = 1, neverHasSideEffects = 1,
                                     ImmOp:$lane),
                                 asmop # "\t$Rt[$lane], [$Rn], $Rm",
                                 [],
-                                NoItinerary>;
+                                NoItinerary>,
+        Sched<[WriteVecSt, ReadVecSt, ReadVecSt, ReadVecSt]>;
 }
 
 multiclass ST_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
@@ -4382,7 +4483,8 @@ class NeonI_Scalar3Same_size<bit u, bits<2> size, bits<5> opcode, string asmop,
                       (outs FPRC:$Rd), (ins FPRC:$Rn, FPRC:$Rm),
                       !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
                       [],
-                      NoItinerary>;
+                      NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
 class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
   : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
@@ -4465,7 +4567,8 @@ class NeonI_Scalar3Diff_size<bit u, bits<2> size, bits<4> opcode, string asmop,
                       (outs FPRCD:$Rd), (ins FPRCS:$Rn, FPRCS:$Rm),
                       !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
                       [],
-                      NoItinerary>;
+                      NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
 multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> {
   def shh : NeonI_Scalar3Diff_size<u, 0b01, opcode, asmop, FPR32, FPR16>;
@@ -4478,12 +4581,14 @@ multiclass NeonI_Scalar3Diff_ml_HS_size<bit u, bits<4> opcode, string asmop> {
                        (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm),
                        !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
                        [],
-                       NoItinerary>;
+                       NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]>;
     def dss : NeonI_Scalar3Diff<u, 0b10, opcode,
                        (outs FPR64:$Rd), (ins FPR64:$Src, FPR32:$Rn, FPR32:$Rm),
                        !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
                        [],
-                       NoItinerary>;
+                       NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]>;
   }
 }
 
@@ -4513,7 +4618,8 @@ class NeonI_Scalar2SameMisc_size<bit u, bits<2> size, bits<5> opcode, string asm
                           (outs FPRCD:$Rd), (ins FPRCS:$Rn),
                           !strconcat(asmop, "\t$Rd, $Rn"),
                           [],
-                          NoItinerary>;
+                          NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
                                          string asmop> {
@@ -4550,7 +4656,8 @@ class NeonI_Scalar2SameMisc_accum_size<bit u, bits<2> size, bits<5> opcode,
                           (outs FPRC:$Rd), (ins FPRC:$Src, FPRC:$Rn),
                           !strconcat(asmop, "\t$Rd, $Rn"),
                           [],
-                          NoItinerary>;
+                          NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
 multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
                                                  string asmop> {
@@ -4610,7 +4717,8 @@ class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop>
                           (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm),
                           !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
                           [],
-                          NoItinerary>;
+                          NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode,
                                               string asmop> {
@@ -4618,12 +4726,14 @@ multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode,
                            (outs FPR32:$Rd), (ins FPR32:$Rn, fpzz32:$FPImm),
                            !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
                            [],
-                           NoItinerary>;
+                           NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
   def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode,
                            (outs FPR64:$Rd), (ins FPR64:$Rn, fpzz32:$FPImm),
                            !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
                            [],
-                           NoItinerary>;
+                           NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode,
@@ -4707,7 +4817,8 @@ class NeonI_ScalarShiftImm_size<bit u, bits<5> opcode, string asmop,
   : NeonI_ScalarShiftImm<u, opcode,
                          (outs FPRC:$Rd), (ins FPRC:$Rn, ImmTy:$Imm),
                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
-                         [], NoItinerary>;
+                         [], NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 multiclass NeonI_ScalarShiftRightImm_D_size<bit u, bits<5> opcode,
                                             string asmop> {
@@ -4772,7 +4883,8 @@ class NeonI_ScalarShiftRightImm_accum_D_size<bit u, bits<5> opcode, string asmop
                          (outs FPR64:$Rd),
                          (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm),
                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
-                         [], NoItinerary> {
+                         [], NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
     bits<6> Imm;
     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
     let Inst{21-16} = Imm;
@@ -4784,7 +4896,8 @@ class NeonI_ScalarShiftLeftImm_accum_D_size<bit u, bits<5> opcode, string asmop>
                          (outs FPR64:$Rd),
                          (ins FPR64:$Src, FPR64:$Rn, shl_imm64:$Imm),
                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
-                         [], NoItinerary> {
+                         [], NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
     bits<6> Imm;
     let Inst{22} = 0b1; // immh:immb = 1xxxxxx
     let Inst{21-16} = Imm;
@@ -4797,7 +4910,8 @@ class NeonI_ScalarShiftImm_narrow_size<bit u, bits<5> opcode, string asmop,
   : NeonI_ScalarShiftImm<u, opcode,
                          (outs FPRCD:$Rd), (ins FPRCS:$Rn, ImmTy:$Imm),
                          !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
-                         [], NoItinerary>;
+                         [], NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 multiclass NeonI_ScalarShiftImm_narrow_HSD_size<bit u, bits<5> opcode,
                                                 string asmop> {
@@ -5111,10 +5225,13 @@ defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubu, UQSUBbbb,
                                            UQSUBhhh, UQSUBsss, UQSUBddd>;
 
 // Scalar Integer Saturating Doubling Multiply Half High
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in
 defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>;
 
 // Scalar Integer Saturating Rounding Doubling Multiply Half High
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
 defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>;
+}
 
 // Patterns to match llvm.arm.* intrinsic for
 // Scalar Integer Saturating Doubling Multiply Half High and
@@ -5124,8 +5241,10 @@ defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh,
 defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh,
                                                                 SQRDMULHsss>;
 
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in {
 // Scalar Floating-point Multiply Extended
 defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
+}
 
 // Scalar Floating-point Reciprocal Step
 defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
@@ -5218,18 +5337,24 @@ defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb,
 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
 defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
 
+let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC] in {
 // Signed Saturating Doubling Multiply-Add Long
 defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">;
+}
 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlal,
                                             SQDMLALshh, SQDMLALdss>;
 
 // Signed Saturating Doubling Multiply-Subtract Long
+let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC] in {
 defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">;
+}
 defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl,
                                             SQDMLSLshh, SQDMLSLdss>;
 
 // Signed Saturating Doubling Multiply Long
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in {
 defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">;
+}
 defm : Neon_Scalar3Diff_HS_size_patterns<int_arm_neon_vqdmull,
                                          SQDMULLshh, SQDMULLdss>;
 
@@ -5557,7 +5682,8 @@ multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
                                 (outs FPR64:$Rd), (ins VPR128:$Rn),
                                 !strconcat(asmop, "\t$Rd, $Rn.2d"),
                                 [],
-                                NoItinerary>;
+                                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
   }
 }
 
@@ -5569,7 +5695,8 @@ multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
                                 (outs FPR32:$Rd), (ins VPR64:$Rn),
                                 !strconcat(asmop, "\t$Rd, $Rn.2s"),
                                 [],
-                                NoItinerary>;
+                                NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
   }
 }
 
@@ -5642,7 +5769,8 @@ class NeonI_ScalarXIndexedElemArith<string asmop, bits<4> opcode,
                              (ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
                              asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
                              [],
-                             NoItinerary> {
+                             NoItinerary>,
+    Sched<[WriteFPMul, ReadFPMul, ReadFPMul]> {
   bits<3> Imm;
   bits<5> MRm;
 }
@@ -5659,7 +5787,8 @@ class NeonI_ScalarXIndexedElemArith_Constraint_Impl<string asmop, bits<4> opcode
                              (ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
                              asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
                              [],
-                             NoItinerary> {
+                             NoItinerary>,
+    Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> {
   let Constraints = "$src = $Rd";
   bits<3> Imm;
   bits<5> MRm;
@@ -6170,7 +6299,8 @@ class NeonI_Scalar_DUP<string asmop, string asmlane,
   : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm),
                      asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]",
                      [],
-                     NoItinerary> {
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]> {
   bits<4> Imm;
 }
 
@@ -6581,7 +6711,8 @@ class NeonI_Extract<bit q, bits<2> op2, string asmop,
                      asmop # "\t$Rd." # OpS # ", $Rn." # OpS #
                      ", $Rm." # OpS # ", $Index",
                      [],
-                     NoItinerary>{
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>{
   bits<4> Index;
 }
 
@@ -6622,7 +6753,8 @@ class NI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
               (outs OpVPR:$Rd), (ins VecList:$Rn, OpVPR:$Rm),
               asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
               [],
-              NoItinerary>;
+              NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
 // The vectors in look up table are always 16b
 multiclass NI_TBL_pat<bits<2> len, bit op, string asmop, string List> {
@@ -6646,7 +6778,8 @@ class NI_TBX<bit q, bits<2> op2, bits<2> len, bit op,
               (outs OpVPR:$Rd), (ins OpVPR:$src, VecList:$Rn, OpVPR:$Rm),
               asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
               [],
-              NoItinerary> {
+              NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
 }
 
@@ -6674,7 +6807,8 @@ class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
                    (ResTy VPR128:$src),
                    (OpTy OpGPR:$Rn),
                    (OpImm:$Imm))))],
-               NoItinerary> {
+               NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
   bits<4> Imm;
   let Constraints = "$src = $Rd";
 }
@@ -6732,7 +6866,8 @@ class NeonI_INS_element<string asmop, string Res, Operand ResImm>
                  ResImm:$Immd, ResImm:$Immn),
                  asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
                  [],
-                 NoItinerary> {
+                 NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
   bits<4> Immd;
   bits<4> Immn;
@@ -6876,7 +7011,8 @@ class NeonI_SMOV<string asmop, string Res, bit Q,
                    (ResTy (vector_extract
                      (OpTy VPR128:$Rn), (OpImm:$Imm))),
                    eleTy)))],
-               NoItinerary> {
+               NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]> {
   bits<4> Imm;
 }
 
@@ -6970,7 +7106,8 @@ class NeonI_UMOV<string asmop, string Res, bit Q,
                [(set (ResTy ResGPR:$Rd),
                   (ResTy (vector_extract
                     (OpTy VPR128:$Rn), (OpImm:$Imm))))],
-               NoItinerary> {
+               NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]> {
   bits<4> Imm;
 }
 
@@ -7128,7 +7265,8 @@ class NeonI_DUP_Elt<bit Q, string asmop, string rdlane,  string rnlane,
                (ins VPR128:$Rn, OpImm:$Imm),
                asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]",
                [],
-               NoItinerary> {
+               NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]> {
   bits<4> Imm;
 }
 
@@ -7234,7 +7372,8 @@ class NeonI_DUP<bit Q, string asmop, string rdlane,
                asmop # "\t$Rd" # rdlane # ", $Rn",
                [(set (ResTy ResVPR:$Rd),
                  (ResTy (Neon_vdup (OpTy OpGPR:$Rn))))],
-               NoItinerary>;
+               NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> {
   let Inst{20-16} = 0b00001;
@@ -7335,7 +7474,8 @@ class NI_2VE<bit q, bit u, bits<2> size, bits<4> opcode,
                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
                  ", $Re." # EleOpS # "[$Index]",
                  [],
-                 NoItinerary> {
+                 NoItinerary>,
+    Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> {
   bits<3> Index;
   bits<5> Re;
 
@@ -7434,7 +7574,8 @@ class NI_2VE_2op<bit q, bit u, bits<2> size, bits<4> opcode,
                  asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
                  ", $Re." # EleOpS # "[$Index]",
                  [],
-                 NoItinerary> {
+                 NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
   bits<3> Index;
   bits<5> Re;
 }
@@ -7473,9 +7614,11 @@ multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop> {
   }
 }
 
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
 defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">;
 defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">;
 defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">;
+}
 
 // Pattern for lane in 128-bit vector
 class NI_2VE_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
@@ -7548,8 +7691,10 @@ multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop> {
   }
 }
 
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
 defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">;
 defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">;
+}
 
 class NI_2VE_mul_lane_2d<Instruction INST, Operand OpImm, SDPatternOperator op,
                          RegisterOperand OpVPR, RegisterOperand EleOpVPR,
@@ -7857,9 +8002,11 @@ multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop> {
   }
 }
 
+let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
 defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">;
 defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">;
 defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">;
+}
 
 def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
           (FMOVdd $src)>;
@@ -8074,7 +8221,8 @@ class NeonI_REV<string asmop, string Res, bits<2> size, bit Q, bit U,
                asmop # "\t$Rd." # Res # ", $Rn." # Res,
                [(set (ResTy ResVPR:$Rd),
                   (ResTy (Neon_Rev (ResTy ResVPR:$Rn))))],
-               NoItinerary> ;
+               NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128,
                           v16i8, Neon_rev64>;
@@ -8113,42 +8261,48 @@ multiclass NeonI_PairwiseAdd<string asmop, bit U, bits<5> opcode,
                            asmop # "\t$Rd.8h, $Rn.16b",
                            [(set (v8i16 VPR128:$Rd),
                               (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))],
-                           NoItinerary>;
+                           NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU]>;
 
   def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
                           (outs VPR64:$Rd), (ins VPR64:$Rn),
                           asmop # "\t$Rd.4h, $Rn.8b",
                           [(set (v4i16 VPR64:$Rd),
                              (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))],
-                          NoItinerary>;
+                          NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
                            (outs VPR128:$Rd), (ins VPR128:$Rn),
                            asmop # "\t$Rd.4s, $Rn.8h",
                            [(set (v4i32 VPR128:$Rd),
                               (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))],
-                           NoItinerary>;
+                           NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
                           (outs VPR64:$Rd), (ins VPR64:$Rn),
                           asmop # "\t$Rd.2s, $Rn.4h",
                           [(set (v2i32 VPR64:$Rd),
                              (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))],
-                          NoItinerary>;
+                          NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
                            (outs VPR128:$Rd), (ins VPR128:$Rn),
                            asmop # "\t$Rd.2d, $Rn.4s",
                            [(set (v2i64 VPR128:$Rd),
                               (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))],
-                           NoItinerary>;
+                           NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
                           (outs VPR64:$Rd), (ins VPR64:$Rn),
                           asmop # "\t$Rd.1d, $Rn.2s",
                           [(set (v1i64 VPR64:$Rd),
                              (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))],
-                          NoItinerary>;
+                          NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010,
@@ -8170,7 +8324,8 @@ multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
                              [(set (v8i16 VPR128:$Rd),
                                 (v8i16 (Neon_Padd
                                   (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))],
-                             NoItinerary>;
+                             NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
                             (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
@@ -8178,7 +8333,8 @@ multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
                             [(set (v4i16 VPR64:$Rd),
                                (v4i16 (Neon_Padd
                                  (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))],
-                            NoItinerary>;
+                            NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
@@ -8186,7 +8342,8 @@ multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
                             [(set (v4i32 VPR128:$Rd),
                                (v4i32 (Neon_Padd
                                  (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))],
-                            NoItinerary>;
+                            NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
                             (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
@@ -8194,7 +8351,8 @@ multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
                             [(set (v2i32 VPR64:$Rd),
                                (v2i32 (Neon_Padd
                                  (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))],
-                            NoItinerary>;
+                            NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
@@ -8202,7 +8360,8 @@ multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
                             [(set (v2i64 VPR128:$Rd),
                                (v2i64 (Neon_Padd
                                  (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))],
-                            NoItinerary>;
+                            NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
                             (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
@@ -8210,7 +8369,8 @@ multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
                             [(set (v1i64 VPR64:$Rd),
                                (v1i64 (Neon_Padd
                                  (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))],
-                            NoItinerary>;
+                            NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
   }
 }
 
@@ -8223,37 +8383,44 @@ multiclass NeonI_2VMisc_BHSDsize_1Arg<string asmop, bit U, bits<5> opcode> {
   def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
                          (outs VPR128:$Rd), (ins VPR128:$Rn),
                          asmop # "\t$Rd.16b, $Rn.16b",
-                         [], NoItinerary>;
+                         [], NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 
   def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
                         (outs VPR128:$Rd), (ins VPR128:$Rn),
                         asmop # "\t$Rd.8h, $Rn.8h",
-                        [], NoItinerary>;
+                        [], NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
                         (outs VPR128:$Rd), (ins VPR128:$Rn),
                         asmop # "\t$Rd.4s, $Rn.4s",
-                        [], NoItinerary>;
+                        [], NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
                         (outs VPR128:$Rd), (ins VPR128:$Rn),
                         asmop # "\t$Rd.2d, $Rn.2d",
-                        [], NoItinerary>;
+                        [], NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
                          (outs VPR64:$Rd), (ins VPR64:$Rn),
                          asmop # "\t$Rd.8b, $Rn.8b",
-                         [], NoItinerary>;
+                         [], NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
                         (outs VPR64:$Rd), (ins VPR64:$Rn),
                         asmop # "\t$Rd.4h, $Rn.4h",
-                        [], NoItinerary>;
+                        [], NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
                         (outs VPR64:$Rd), (ins VPR64:$Rn),
                         asmop # "\t$Rd.2s, $Rn.2s",
-                        [], NoItinerary>;
+                        [], NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>;
@@ -8323,37 +8490,44 @@ multiclass NeonI_2VMisc_BHSDsize_2Args<string asmop, bit U, bits<5> opcode> {
     def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
                            (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
                            asmop # "\t$Rd.16b, $Rn.16b",
-                           [], NoItinerary>;
+                           [], NoItinerary>,
+              Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
                           asmop # "\t$Rd.8h, $Rn.8h",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
                           asmop # "\t$Rd.4s, $Rn.4s",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
                           asmop # "\t$Rd.2d, $Rn.2d",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
                           (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
                           asmop # "\t$Rd.8b, $Rn.8b",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
                           (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
                           asmop # "\t$Rd.4h, $Rn.4h",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
                           (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
                           asmop # "\t$Rd.2s, $Rn.2s",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
   }
 }
 
@@ -8401,42 +8575,48 @@ multiclass NeonI_2VMisc_BHSsizes<string asmop, bit U,
                          asmop # "\t$Rd.16b, $Rn.16b",
                          [(set (v16i8 VPR128:$Rd),
                             (v16i8 (Neon_Op (v16i8 VPR128:$Rn))))],
-                         NoItinerary>;
+                         NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 
   def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100,
                         (outs VPR128:$Rd), (ins VPR128:$Rn),
                         asmop # "\t$Rd.8h, $Rn.8h",
                         [(set (v8i16 VPR128:$Rd),
                            (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100,
                         (outs VPR128:$Rd), (ins VPR128:$Rn),
                         asmop # "\t$Rd.4s, $Rn.4s",
                         [(set (v4i32 VPR128:$Rd),
                            (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100,
                         (outs VPR64:$Rd), (ins VPR64:$Rn),
                         asmop # "\t$Rd.8b, $Rn.8b",
                         [(set (v8i8 VPR64:$Rd),
                            (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100,
                         (outs VPR64:$Rd), (ins VPR64:$Rn),
                         asmop # "\t$Rd.4h, $Rn.4h",
                         [(set (v4i16 VPR64:$Rd),
                            (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100,
                         (outs VPR64:$Rd), (ins VPR64:$Rn),
                         asmop # "\t$Rd.2s, $Rn.2s",
                         [(set (v2i32 VPR64:$Rd),
                            (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>;
@@ -8447,12 +8627,14 @@ multiclass NeonI_2VMisc_Bsize<string asmop, bit U, bits<2> size,
   def 16b : NeonI_2VMisc<0b1, U, size, Opcode,
                          (outs VPR128:$Rd), (ins VPR128:$Rn),
                          asmop # "\t$Rd.16b, $Rn.16b",
-                         [], NoItinerary>;
+                         [], NoItinerary>,
+            Sched<[WriteFPALU, ReadFPALU]>;
 
   def 8b : NeonI_2VMisc<0b0, U, size, Opcode,
                         (outs VPR64:$Rd), (ins VPR64:$Rn),
                         asmop # "\t$Rd.8b, $Rn.8b",
-                        [], NoItinerary>;
+                        [], NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>;
@@ -8510,21 +8692,24 @@ multiclass NeonI_2VMisc_SDsizes<string asmop, bit U, bits<5> opcode,
                         asmop # "\t$Rd.4s, $Rn.4s",
                         [(set (v4f32 VPR128:$Rd),
                            (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
                         (outs VPR128:$Rd), (ins VPR128:$Rn),
                         asmop # "\t$Rd.2d, $Rn.2d",
                         [(set (v2f64 VPR128:$Rd),
                            (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
                         (outs VPR64:$Rd), (ins VPR64:$Rn),
                         asmop # "\t$Rd.2s, $Rn.2s",
                         [(set (v2f32 VPR64:$Rd),
                            (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>;
@@ -8534,33 +8719,39 @@ multiclass NeonI_2VMisc_HSD_Narrow<string asmop, bit U, bits<5> opcode> {
   def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
                           (outs VPR64:$Rd), (ins VPR128:$Rn),
                           asmop # "\t$Rd.8b, $Rn.8h",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
                           (outs VPR64:$Rd), (ins VPR128:$Rn),
                           asmop # "\t$Rd.4h, $Rn.4s",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
                           (outs VPR64:$Rd), (ins VPR128:$Rn),
                           asmop # "\t$Rd.2s, $Rn.2d",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   let Constraints = "$Rd = $src" in {
     def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
                              (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
                              asmop # "2\t$Rd.16b, $Rn.8h",
-                             [], NoItinerary>;
+                             [], NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
                             asmop # "2\t$Rd.8h, $Rn.4s",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
                             asmop # "2\t$Rd.4s, $Rn.2d",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
   }
 }
 
@@ -8613,37 +8804,43 @@ multiclass NeonI_2VMisc_SHIFT<string asmop, bit U, bits<5> opcode> {
                             (outs VPR128:$Rd),
                             (ins VPR64:$Rn, uimm_exact8:$Imm),
                             asmop # "\t$Rd.8h, $Rn.8b, $Imm",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU]>;
 
     def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode,
                             (outs VPR128:$Rd),
                             (ins VPR64:$Rn, uimm_exact16:$Imm),
                             asmop # "\t$Rd.4s, $Rn.4h, $Imm",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU]>;
 
     def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode,
                             (outs VPR128:$Rd),
                             (ins VPR64:$Rn, uimm_exact32:$Imm),
                             asmop # "\t$Rd.2d, $Rn.2s, $Imm",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU]>;
 
     def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
                             (outs VPR128:$Rd),
                             (ins VPR128:$Rn, uimm_exact8:$Imm),
                             asmop # "2\t$Rd.8h, $Rn.16b, $Imm",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+                Sched<[WriteFPALU, ReadFPALU]>;
 
     def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
                             (outs VPR128:$Rd),
                             (ins VPR128:$Rn, uimm_exact16:$Imm),
                             asmop # "2\t$Rd.4s, $Rn.8h, $Imm",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU]>;
 
     def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
                             (outs VPR128:$Rd),
                             (ins VPR128:$Rn, uimm_exact32:$Imm),
                             asmop # "2\t$Rd.2d, $Rn.4s, $Imm",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU]>;
   }
 }
 
@@ -8691,23 +8888,27 @@ multiclass NeonI_2VMisc_SD_Narrow<string asmop, bit U, bits<5> opcode> {
   def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
                           (outs VPR64:$Rd), (ins VPR128:$Rn),
                           asmop # "\t$Rd.4h, $Rn.4s",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
                           (outs VPR64:$Rd), (ins VPR128:$Rn),
                           asmop # "\t$Rd.2s, $Rn.2d",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   let Constraints = "$src = $Rd" in {
     def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
                             asmop # "2\t$Rd.8h, $Rn.4s",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
 
     def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
                             (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
                             asmop # "2\t$Rd.4s, $Rn.2d",
-                            [], NoItinerary>;
+                            [], NoItinerary>,
+               Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
   }
 }
 
@@ -8745,12 +8946,14 @@ multiclass NeonI_2VMisc_D_Narrow<string asmop, string prefix, bit U,
   def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
                           (outs VPR64:$Rd), (ins VPR128:$Rn),
                           asmop # "\t$Rd.2s, $Rn.2d",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
                           (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
                           asmop # "2\t$Rd.4s, $Rn.2d",
-                          [], NoItinerary> {
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
     let Constraints = "$src = $Rd";
   }
 
@@ -8774,22 +8977,26 @@ multiclass NeonI_2VMisc_HS_Extend<string asmop, bit U, bits<5> opcode> {
   def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode,
                           (outs VPR128:$Rd), (ins VPR64:$Rn),
                           asmop # "\t$Rd.4s, $Rn.4h",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode,
                           (outs VPR128:$Rd), (ins VPR64:$Rn),
                           asmop # "\t$Rd.2d, $Rn.2s",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode,
                           (outs VPR128:$Rd), (ins VPR128:$Rn),
                           asmop # "2\t$Rd.4s, $Rn.8h",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 
   def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode,
                           (outs VPR128:$Rd), (ins VPR128:$Rn),
                           asmop # "2\t$Rd.2d, $Rn.4s",
-                          [], NoItinerary>;
+                          [], NoItinerary>,
+             Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>;
@@ -8825,21 +9032,24 @@ multiclass NeonI_2VMisc_SD_Conv<string asmop, bit Size, bit U, bits<5> opcode,
                         asmop # "\t$Rd.4s, $Rn.4s",
                         [(set (ResTy4s VPR128:$Rd),
                            (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode,
                         (outs VPR128:$Rd), (ins VPR128:$Rn),
                         asmop # "\t$Rd.2d, $Rn.2d",
                         [(set (ResTy2d VPR128:$Rd),
                            (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
                         (outs VPR64:$Rd), (ins VPR64:$Rn),
                         asmop # "\t$Rd.2s, $Rn.2s",
                         [(set (ResTy2s VPR64:$Rd),
                            (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 multiclass NeonI_2VMisc_fp_to_int<string asmop, bit Size, bit U,
@@ -8894,7 +9104,9 @@ defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101,
                                     int_arm_neon_vrecpe>;
 defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101,
                                      int_arm_neon_vrsqrte>;
+let SchedRW = [WriteFPSqrt, ReadFPSqrt] in {
 defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>;
+}
 
 multiclass NeonI_2VMisc_S_Conv<string asmop, bit Size, bit U,
                                bits<5> opcode, SDPatternOperator Neon_Op> {
@@ -8903,14 +9115,16 @@ multiclass NeonI_2VMisc_S_Conv<string asmop, bit Size, bit U,
                         asmop # "\t$Rd.4s, $Rn.4s",
                         [(set (v4i32 VPR128:$Rd),
                            (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 
   def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
                         (outs VPR64:$Rd), (ins VPR64:$Rn),
                         asmop # "\t$Rd.2s, $Rn.2s",
                         [(set (v2i32 VPR64:$Rd),
                            (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
-                        NoItinerary>;
+                        NoItinerary>,
+           Sched<[WriteFPALU, ReadFPALU]>;
 }
 
 defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100,
@@ -8927,7 +9141,8 @@ class NeonI_Cryptoaes_2v<bits<2> size, bits<5> opcode,
                      [(set (v16i8 VPR128:$Rd),
                         (v16i8 (opnode (v16i8 VPR128:$src),
                                        (v16i8 VPR128:$Rn))))],
-                     NoItinerary>{
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
   let Predicates = [HasNEON, HasCrypto];
 }
@@ -8942,7 +9157,8 @@ class NeonI_Cryptoaes<bits<2> size, bits<5> opcode,
                      asmop # "\t$Rd.16b, $Rn.16b",
                      [(set (v16i8 VPR128:$Rd),
                         (v16i8 (opnode (v16i8 VPR128:$Rn))))],
-                     NoItinerary>;
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]>;
 
 def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>;
 def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>;
@@ -8955,7 +9171,8 @@ class NeonI_Cryptosha_vv<bits<2> size, bits<5> opcode,
                      [(set (v4i32 VPR128:$Rd),
                         (v4i32 (opnode (v4i32 VPR128:$src),
                                        (v4i32 VPR128:$Rn))))],
-                     NoItinerary> {
+                     NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
   let Predicates = [HasNEON, HasCrypto];
 }
@@ -8970,7 +9187,8 @@ class NeonI_Cryptosha_ss<bits<2> size, bits<5> opcode,
   : NeonI_Crypto_SHA<size, opcode,
                      (outs FPR32:$Rd), (ins FPR32:$Rn),
                      asmop # "\t$Rd, $Rn",
-                     [], NoItinerary> {
+                     [], NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU]> {
   let Predicates = [HasNEON, HasCrypto];
   let hasSideEffects = 0;
 }
@@ -8990,7 +9208,8 @@ class NeonI_Cryptosha3_vvv<bits<2> size, bits<3> opcode, string asmop,
                           (v4i32 (opnode (v4i32 VPR128:$src),
                                          (v4i32 VPR128:$Rn),
                                          (v4i32 VPR128:$Rm))))],
-                       NoItinerary> {
+                       NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
   let Predicates = [HasNEON, HasCrypto];
 }
@@ -9010,7 +9229,8 @@ class NeonI_Cryptosha3_qqv<bits<2> size, bits<3> opcode, string asmop,
                           (v4i32 (opnode (v4i32 FPR128:$src),
                                          (v4i32 FPR128:$Rn),
                                          (v4i32 VPR128:$Rm))))],
-                       NoItinerary> {
+                       NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
   let Predicates = [HasNEON, HasCrypto];
 }
@@ -9025,7 +9245,8 @@ class NeonI_Cryptosha3_qsv<bits<2> size, bits<3> opcode, string asmop>
                        (outs FPR128:$Rd),
                        (ins FPR128:$src, FPR32:$Rn, VPR128:$Rm),
                        asmop # "\t$Rd, $Rn, $Rm.4s",
-                       [], NoItinerary> {
+                       [], NoItinerary>,
+    Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> {
   let Constraints = "$src = $Rd";
   let hasSideEffects = 0;
   let Predicates = [HasNEON, HasCrypto];
diff --git a/lib/Target/AArch64/AArch64Schedule.td b/lib/Target/AArch64/AArch64Schedule.td
index 6fcb1116b6c..ec8450b9c1f 100644
--- a/lib/Target/AArch64/AArch64Schedule.td
+++ b/lib/Target/AArch64/AArch64Schedule.td
@@ -37,8 +37,16 @@ def ReadDiv : SchedRead;
 // Loads
 def WriteLd : SchedWrite;
 def WritePreLd : SchedWrite;
+def WriteVecLd : SchedWrite;
 def ReadLd : SchedRead;
 def ReadPreLd : SchedRead;
+def ReadVecLd : SchedRead;
+
+// Stores
+def WriteSt : SchedWrite;
+def WriteVecSt : SchedWrite;
+def ReadSt : SchedRead;
+def ReadVecSt : SchedRead;
 
 // Branches
 def WriteBr : SchedWrite;
diff --git a/lib/Target/AArch64/AArch64ScheduleA53.td b/lib/Target/AArch64/AArch64ScheduleA53.td
index e288a24eb2c..20a14e79228 100644
--- a/lib/Target/AArch64/AArch64ScheduleA53.td
+++ b/lib/Target/AArch64/AArch64ScheduleA53.td
@@ -71,9 +71,18 @@ def : WriteRes<WriteMAC, [A53UnitMAC]> { let Latency = 4; }
 // Div
 def : WriteRes<WriteDiv, [A53UnitDiv]> { let Latency = 4; }
 
-// Load
+// Load - Note: Vector loads take 1-5 cycles to issue. For the WriteVecLd below,
+//        choosing the median of 3 which makes the latency 6. May model this more
+//        carefully in the future.
 def : WriteRes<WriteLd, [A53UnitLdSt]> { let Latency = 4; }
 def : WriteRes<WritePreLd, [A53UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteVecLd, [A53UnitLdSt]> { let Latency = 6; }
+
+// Store - Note: Vector stores take 1-3 cycles to issue. For the ReadVecSt below,
+//         choosing the median of 2 which makes the latency 5. May model this more
+//         carefully in the future.
+def : WriteRes<WriteSt, [A53UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteVecSt, [A53UnitLdSt]> { let Latency = 5; }
 
 // Branch
 def : WriteRes<WriteBr, [A53UnitB]>;
@@ -114,9 +123,14 @@ def : ReadAdvance<ReadMAC, 0>;
 // No forwarding defined for ReadDiv yet.
 def : ReadAdvance<ReadDiv, 0>;
 
-// No forwarding defined for ReadLd, ReadPreLd yet.
+// No forwarding defined for ReadLd, ReadPreLd, ReadVecLd yet.
 def : ReadAdvance<ReadLd, 0>;
 def : ReadAdvance<ReadPreLd, 0>;
+def : ReadAdvance<ReadVecLd, 0>;
+
+// No forwarding defined for ReadSt and ReadVecSt yet.
+def : ReadAdvance<ReadSt, 0>;
+def : ReadAdvance<ReadVecSt, 0>;
 
 // No forwarding defined for ReadFPALU yet.
 def : ReadAdvance<ReadFPALU, 0>;
diff --git a/test/CodeGen/AArch64/misched-basic-A53.ll b/test/CodeGen/AArch64/misched-basic-A53.ll
index 0d5534eca54..1555c4868e1 100644
--- a/test/CodeGen/AArch64/misched-basic-A53.ll
+++ b/test/CodeGen/AArch64/misched-basic-A53.ll
@@ -4,13 +4,15 @@
 ; The Cortex-A53 machine model will cause the MADD instruction to be scheduled
 ; much higher than the ADD instructions in order to hide latency. When not
 ; specifying a subtarget, the MADD will remain near the end of the block.
+;
+; CHECK: ********** MI Scheduling **********
 ; CHECK: main
 ; CHECK: *** Final schedule for BB#2 ***
 ; CHECK: SU(13)
 ; CHECK: MADDwwww
 ; CHECK: SU(4)
 ; CHECK: ADDwwi_lsl0_s
-; CHECK: ********** MI Scheduling **********
+; CHECK: ********** INTERVALS **********
 @main.x = private unnamed_addr constant [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 4
 @main.y = private unnamed_addr constant [8 x i32] [i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2], align 4
 
@@ -76,6 +78,33 @@ for.end:                                          ; preds = %for.cond
   ret i32 %add6
 }
 
+
+; The Cortex-A53 machine model will cause the FDIVvvv_42 to be raised to
+; hide latency. Whereas normally there would only be a single FADDvvv_4s
+; after it, this test checks to make sure there are more than one.
+;
+; CHECK: ********** MI Scheduling **********
+; CHECK: neon4xfloat:BB#0
+; CHECK: *** Final schedule for BB#0 ***
+; CHECK: FDIVvvv_4S
+; CHECK: FADDvvv_4S
+; CHECK: FADDvvv_4S
+; CHECK: ********** INTERVALS **********
+define <4 x float> @neon4xfloat(<4 x float> %A, <4 x float> %B) {
+        %tmp1 = fadd <4 x float> %A, %B;
+        %tmp2 = fadd <4 x float> %A, %tmp1;
+        %tmp3 = fadd <4 x float> %A, %tmp2;
+        %tmp4 = fadd <4 x float> %A, %tmp3;
+        %tmp5 = fadd <4 x float> %A, %tmp4;
+        %tmp6 = fadd <4 x float> %A, %tmp5;
+        %tmp7 = fadd <4 x float> %A, %tmp6;
+        %tmp8 = fadd <4 x float> %A, %tmp7;
+        %tmp9 = fdiv <4 x float> %A, %B;
+        %tmp10 = fadd <4 x float> %tmp8, %tmp9;
+
+        ret <4 x float> %tmp10
+}
+
 ; Function Attrs: nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
 
-- 
2.34.1