[FastISel][AArch64] Fix address simplification.
authorJuergen Ributzka <juergen@apple.com>
Wed, 27 Aug 2014 00:58:30 +0000 (00:58 +0000)
committerJuergen Ributzka <juergen@apple.com>
Wed, 27 Aug 2014 00:58:30 +0000 (00:58 +0000)
When a shift with extension or an add with shift and extension cannot be folded
into the memory operation, then the address calculation has to be materialized
separately. While doing so the code forgot to consider a possible sign-/zero-
extension. This fix folds now also the sign-/zero-extension into the add or
shift instruction which is used to materialize the address.

This fixes rdar://problem/18141718.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216511 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/AArch64/AArch64FastISel.cpp
test/CodeGen/AArch64/fast-isel-addressing-modes.ll

index 4811691732ed788636caaf74aaf95db9aca2d382..bf58d0f6e2553c416fc27a813f855a9dd0faaf31 100644 (file)
@@ -156,10 +156,19 @@ private:
   unsigned emitAddsSubs_ri(bool UseAdds, MVT RetVT, unsigned LHSReg,
                            bool LHSIsKill, uint64_t Imm,
                            bool WantResult = true);
+  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
+                         bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
+                         AArch64_AM::ShiftExtendType ShiftType,
+                         uint64_t ShiftImm, bool WantResult = true);
   unsigned emitAddsSubs_rs(bool UseAdds, MVT RetVT, unsigned LHSReg,
                            bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
                            AArch64_AM::ShiftExtendType ShiftType,
                            uint64_t ShiftImm, bool WantResult = true);
+  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
+                         bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
+                          AArch64_AM::ShiftExtendType ExtType,
+                          uint64_t ShiftImm, bool WantResult = true);
+
   unsigned emitAddsSubs_rx(bool UseAdds, MVT RetVT, unsigned LHSReg,
                            bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
                            AArch64_AM::ShiftExtendType ExtType,
@@ -715,20 +724,38 @@ bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT) {
 
   if (RegisterOffsetNeedsLowering) {
     unsigned ResultReg = 0;
-    if (Addr.getReg())
-      ResultReg = FastEmitInst_rri(AArch64::ADDXrs, &AArch64::GPR64RegClass,
-                                   Addr.getReg(), /*TODO:IsKill=*/false,
-                                   Addr.getOffsetReg(), /*TODO:IsKill=*/false,
-                                   Addr.getShift());
-    else
-      ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
-                             /*Op0IsKill=*/false, Addr.getShift());
+    if (Addr.getReg()) {
+      if (Addr.getExtendType() == AArch64_AM::SXTW ||
+          Addr.getExtendType() == AArch64_AM::UXTW   )
+        ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
+                                  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
+                                  /*TODO:IsKill=*/false, Addr.getExtendType(),
+                                  Addr.getShift());
+      else
+        ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
+                                  /*TODO:IsKill=*/false, Addr.getOffsetReg(),
+                                  /*TODO:IsKill=*/false, AArch64_AM::LSL,
+                                  Addr.getShift());
+    } else {
+      if (Addr.getExtendType() == AArch64_AM::UXTW)
+        ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
+                               /*Op0IsKill=*/false, Addr.getShift(),
+                               /*IsZExt=*/true);
+      else if (Addr.getExtendType() == AArch64_AM::SXTW)
+        ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
+                               /*Op0IsKill=*/false, Addr.getShift(),
+                               /*IsZExt=*/false);
+      else
+        ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
+                               /*Op0IsKill=*/false, Addr.getShift());
+    }
     if (!ResultReg)
       return false;
 
     Addr.setReg(ResultReg);
     Addr.setOffsetReg(0);
     Addr.setShift(0);
+    Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
   }
 
   // Since the offset is too large for the load/store instruction get the
@@ -978,6 +1005,40 @@ unsigned AArch64FastISel::emitAddsSubs_ri(bool UseAdds, MVT RetVT,
   return ResultReg;
 }
 
+unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT,
+                                        unsigned LHSReg, bool LHSIsKill,
+                                        unsigned RHSReg, bool RHSIsKill,
+                                        AArch64_AM::ShiftExtendType ShiftType,
+                                        uint64_t ShiftImm, bool WantResult) {
+  assert(LHSReg && RHSReg && "Invalid register number.");
+
+  if (RetVT != MVT::i32 && RetVT != MVT::i64)
+    return 0;
+
+  static const unsigned OpcTable[2][2] = {
+    { AArch64::ADDWrs, AArch64::ADDXrs },
+    { AArch64::SUBWrs, AArch64::SUBXrs }
+  };
+  unsigned Opc = OpcTable[!UseAdd][(RetVT == MVT::i64)];
+  unsigned ResultReg;
+  if (WantResult) {
+    const TargetRegisterClass *RC =
+        (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+    ResultReg = createResultReg(RC);
+  } else
+    ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
+
+  const MCInstrDesc &II = TII.get(Opc);
+  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
+  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+      .addReg(LHSReg, getKillRegState(LHSIsKill))
+      .addReg(RHSReg, getKillRegState(RHSIsKill))
+      .addImm(getShifterImm(ShiftType, ShiftImm));
+  
+  return ResultReg;
+}
+
 unsigned AArch64FastISel::emitAddsSubs_rs(bool UseAdds, MVT RetVT,
                                           unsigned LHSReg, bool LHSIsKill,
                                           unsigned RHSReg, bool RHSIsKill,
@@ -1012,6 +1073,40 @@ unsigned AArch64FastISel::emitAddsSubs_rs(bool UseAdds, MVT RetVT,
   return ResultReg;
 }
 
+unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT,
+                                        unsigned LHSReg, bool LHSIsKill,
+                                        unsigned RHSReg, bool RHSIsKill,
+                                        AArch64_AM::ShiftExtendType ExtType,
+                                        uint64_t ShiftImm, bool WantResult) {
+  assert(LHSReg && RHSReg && "Invalid register number.");
+
+  if (RetVT != MVT::i32 && RetVT != MVT::i64)
+    return 0;
+
+  static const unsigned OpcTable[2][2] = {
+    { AArch64::ADDWrx, AArch64::ADDXrx },
+    { AArch64::SUBWrx, AArch64::SUBXrx }
+  };
+  unsigned Opc = OpcTable[!UseAdd][(RetVT == MVT::i64)];
+  unsigned ResultReg;
+  if (WantResult) {
+    const TargetRegisterClass *RC =
+        (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
+    ResultReg = createResultReg(RC);
+  } else
+    ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
+
+  const MCInstrDesc &II = TII.get(Opc);
+  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
+  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+      .addReg(LHSReg, getKillRegState(LHSIsKill))
+      .addReg(RHSReg, getKillRegState(RHSIsKill))
+      .addImm(getArithExtendImm(ExtType, ShiftImm));
+
+  return ResultReg;
+}
+
 unsigned AArch64FastISel::emitAddsSubs_rx(bool UseAdds, MVT RetVT,
                                           unsigned LHSReg, bool LHSIsKill,
                                           unsigned RHSReg, bool RHSIsKill,
index b950a24f558366b16883696dd2f50b4004edbc93..222c9605f83e46a5bdc2fa7b58976fa246dc2cf2 100644 (file)
@@ -423,3 +423,30 @@ define i32 @load_breg_sext_shift_offreg_2(i32 %a, i64 %b) {
   ret i32 %5
 }
 
+; Load Scaled Register Offset + Immediate Offset + Sign/Zero extension
+define i64 @load_sext_shift_offreg_imm1(i32 %a) {
+; CHECK-LABEL: load_sext_shift_offreg_imm1
+; CHECK:       sbfiz [[REG:x[0-9]+]], x0, #3, #32
+; CHECK-NEXT:  ldr {{x[0-9]+}}, {{\[}}[[REG]], #8{{\]}}
+  %1 = sext i32 %a to i64
+  %2 = shl i64 %1, 3
+  %3 = add i64 %2, 8
+  %4 = inttoptr i64 %3 to i64*
+  %5 = load i64* %4
+  ret i64 %5
+}
+
+; Load Base Register + Scaled Register Offset + Immediate Offset + Sign/Zero extension
+define i64 @load_breg_sext_shift_offreg_imm1(i32 %a, i64 %b) {
+; CHECK-LABEL: load_breg_sext_shift_offreg_imm1
+; CHECK:       add [[REG:x[0-9]+]], x1, w0, sxtw #3
+; CHECK-NEXT:  ldr {{x[0-9]+}}, {{\[}}[[REG]], #8{{\]}}
+  %1 = sext i32 %a to i64
+  %2 = shl i64 %1, 3
+  %3 = add i64 %b, %2
+  %4 = add i64 %3, 8
+  %5 = inttoptr i64 %4 to i64*
+  %6 = load i64* %5
+  ret i64 %6
+}
+