From fc03e72b4f30e7ccb51f3cfb6faada689b333750 Mon Sep 17 00:00:00 2001
From: Juergen Ributzka <juergen@apple.com>
Date: Wed, 27 Aug 2014 00:58:30 +0000
Subject: [PATCH] [FastISel][AArch64] Fix address simplification.

When a shift with extension or an add with shift and extension cannot be folded
into the memory operation, then the address calculation has to be materialized
separately. While doing so the code forgot to consider a possible sign-/zero-
extension. This fix folds now also the sign-/zero-extension into the add or
shift instruction which is used to materialize the address.

This fixes rdar://problem/18141718.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216511 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/AArch64/AArch64FastISel.cpp        | 111 ++++++++++++++++--
 .../AArch64/fast-isel-addressing-modes.ll     |  27 +++++
 2 files changed, 130 insertions(+), 8 deletions(-)

diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 4811691732e..bf58d0f6e25 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -156,10 +156,19 @@ private:
   unsigned emitAddsSubs_ri(bool UseAdds, MVT RetVT, unsigned LHSReg,
                            bool LHSIsKill, uint64_t Imm,
                            bool WantResult = true);
+  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
+                         bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
+                         AArch64_AM::ShiftExtendType ShiftType,
+                         uint64_t ShiftImm, bool WantResult = true);
   unsigned emitAddsSubs_rs(bool UseAdds, MVT RetVT, unsigned LHSReg,
                            bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
                            AArch64_AM::ShiftExtendType ShiftType,
                            uint64_t ShiftImm, bool WantResult = true);
+  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
+                         bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
+                          AArch64_AM::ShiftExtendType ExtType,
+                          uint64_t ShiftImm, bool WantResult = true);
+
   unsigned emitAddsSubs_rx(bool UseAdds, MVT RetVT, unsigned LHSReg,
                            bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
                            AArch64_AM::ShiftExtendType ExtType,
@@ -715,20 +724,38 @@ bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT) {
 
   if (RegisterOffsetNeedsLowering) {
     unsigned ResultReg = 0;
-    if (Addr.getReg())
-      ResultReg = FastEmitInst_rri(AArch64::ADDXrs, &AArch64::GPR64RegClass,
-                                   Addr.getReg(), /*TODO:IsKill=*/false,
-                                   Addr.getOffsetReg(), /*TODO:IsKill=*/false,
-                                   Addr.getShift());
-    else
-      ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
-                             /*Op0IsKill=*/false, Addr.getShift());
+    if (Addr.getReg()) {
+      if (Addr.getExtendType() == AArch64_AM::SXTW ||
+          Addr.getExtendType() == AArch64_AM::UXTW   )
+        ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
+                                  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
+                                  /*TODO:IsKill=*/false, Addr.getExtendType(),
+                                  Addr.getShift());
+      else
+        ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
+                                  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
+                                  /*TODO:IsKill=*/false, AArch64_AM::LSL,
+                                  Addr.getShift());
+    } else {
+      if (Addr.getExtendType() == AArch64_AM::UXTW)
+        ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
+                               /*Op0IsKill=*/false, Addr.getShift(),
+                               /*IsZExt=*/true);
+      else if (Addr.getExtendType() == AArch64_AM::SXTW)
+        ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
+                               /*Op0IsKill=*/false, Addr.getShift(),
+                               /*IsZExt=*/false);
+      else
+        ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
+                               /*Op0IsKill=*/false, Addr.getShift());
+    }
     if (!ResultReg)
       return false;
 
     Addr.setReg(ResultReg);
     Addr.setOffsetReg(0);
     Addr.setShift(0);
+    Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
   }
 
   // Since the offset is too large for the load/store instruction get the
@@ -978,6 +1005,40 @@ unsigned AArch64FastISel::emitAddsSubs_ri(bool UseAdds, MVT RetVT,
   return ResultReg;
 }
 
+unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT,
+                                        unsigned LHSReg, bool LHSIsKill,
+                                        unsigned RHSReg, bool RHSIsKill,
+                                        AArch64_AM::ShiftExtendType ShiftType,
+                                        uint64_t ShiftImm, bool WantResult) {
+  assert(LHSReg && RHSReg && "Invalid register number.");
+
+  if (RetVT != MVT::i32 && RetVT != MVT::i64)
+    return 0;
+
+  static const unsigned OpcTable[2][2] = {
+    { AArch64::ADDWrs, AArch64::ADDXrs },
+    { AArch64::SUBWrs, AArch64::SUBXrs }
+  };
+  unsigned Opc = OpcTable[!UseAdd][(RetVT == MVT::i64)];
+  unsigned ResultReg;
+  if (WantResult) {
+    const TargetRegisterClass *RC =
+        (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+    ResultReg = createResultReg(RC);
+  } else
+    ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
+
+  const MCInstrDesc &II = TII.get(Opc);
+  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
+  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+      .addReg(LHSReg, getKillRegState(LHSIsKill))
+      .addReg(RHSReg, getKillRegState(RHSIsKill))
+      .addImm(getShifterImm(ShiftType, ShiftImm));
+  
+  return ResultReg;
+}
+
 unsigned AArch64FastISel::emitAddsSubs_rs(bool UseAdds, MVT RetVT,
                                           unsigned LHSReg, bool LHSIsKill,
                                           unsigned RHSReg, bool RHSIsKill,
@@ -1012,6 +1073,40 @@ unsigned AArch64FastISel::emitAddsSubs_rs(bool UseAdds, MVT RetVT,
   return ResultReg;
 }
 
+unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT,
+                                        unsigned LHSReg, bool LHSIsKill,
+                                        unsigned RHSReg, bool RHSIsKill,
+                                        AArch64_AM::ShiftExtendType ExtType,
+                                        uint64_t ShiftImm, bool WantResult) {
+  assert(LHSReg && RHSReg && "Invalid register number.");
+
+  if (RetVT != MVT::i32 && RetVT != MVT::i64)
+    return 0;
+
+  static const unsigned OpcTable[2][2] = {
+    { AArch64::ADDWrx, AArch64::ADDXrx },
+    { AArch64::SUBWrx, AArch64::SUBXrx }
+  };
+  unsigned Opc = OpcTable[!UseAdd][(RetVT == MVT::i64)];
+  unsigned ResultReg;
+  if (WantResult) {
+    const TargetRegisterClass *RC =
+        (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+    ResultReg = createResultReg(RC);
+  } else
+    ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
+
+  const MCInstrDesc &II = TII.get(Opc);
+  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
+  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+      .addReg(LHSReg, getKillRegState(LHSIsKill))
+      .addReg(RHSReg, getKillRegState(RHSIsKill))
+      .addImm(getArithExtendImm(ExtType, ShiftImm));
+
+  return ResultReg;
+}
+
 unsigned AArch64FastISel::emitAddsSubs_rx(bool UseAdds, MVT RetVT,
                                           unsigned LHSReg, bool LHSIsKill,
                                           unsigned RHSReg, bool RHSIsKill,
diff --git a/test/CodeGen/AArch64/fast-isel-addressing-modes.ll b/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
index b950a24f558..222c9605f83 100644
--- a/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
+++ b/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
@@ -423,3 +423,30 @@ define i32 @load_breg_sext_shift_offreg_2(i32 %a, i64 %b) {
   ret i32 %5
 }
 
+; Load Scaled Register Offset + Immediate Offset + Sign/Zero extension
+define i64 @load_sext_shift_offreg_imm1(i32 %a) {
+; CHECK-LABEL: load_sext_shift_offreg_imm1
+; CHECK:       sbfiz [[REG:x[0-9]+]], x0, #3, #32
+; CHECK-NEXT:  ldr {{x[0-9]+}}, {{\[}}[[REG]], #8{{\]}}
+  %1 = sext i32 %a to i64
+  %2 = shl i64 %1, 3
+  %3 = add i64 %2, 8
+  %4 = inttoptr i64 %3 to i64*
+  %5 = load i64* %4
+  ret i64 %5
+}
+
+; Load Base Register + Scaled Register Offset + Immediate Offset + Sign/Zero extension
+define i64 @load_breg_sext_shift_offreg_imm1(i32 %a, i64 %b) {
+; CHECK-LABEL: load_breg_sext_shift_offreg_imm1
+; CHECK:       add [[REG:x[0-9]+]], x1, w0, sxtw #3
+; CHECK-NEXT:  ldr {{x[0-9]+}}, {{\[}}[[REG]], #8{{\]}}
+  %1 = sext i32 %a to i64
+  %2 = shl i64 %1, 3
+  %3 = add i64 %b, %2
+  %4 = add i64 %3, 8
+  %5 = inttoptr i64 %4 to i64*
+  %6 = load i64* %5
+  ret i64 %6
+}
+
-- 
2.34.1