[ARM64-BE] Make big endian (scalar) argument passing work correctly.

author James Molloy <james.molloy@arm.com>

Wed, 7 May 2014 11:28:36 +0000 (11:28 +0000)

committer James Molloy <james.molloy@arm.com>

Wed, 7 May 2014 11:28:36 +0000 (11:28 +0000)
author James Molloy <james.molloy@arm.com>
Wed, 7 May 2014 11:28:36 +0000 (11:28 +0000)
committer James Molloy <james.molloy@arm.com>
Wed, 7 May 2014 11:28:36 +0000 (11:28 +0000)
diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp

index 19c76c6431419c75c65e882bc7e421a050cd4382..0f2efb35f334d0ae6ebb8a768f8f8abe1acda3d4 100644 (file)
--- a/lib/Target/ARM64/ARM64ISelLowering.cpp
+++ b/lib/Target/ARM64/ARM64ISelLowering.cpp
@@ -1678,8 +1678,10 @@ SDValue ARM64TargetLowering::LowerFormalArguments(
        int Size = Ins[i].Flags.getByValSize();
        unsigned NumRegs = (Size + 7) / 8;
  
+      // FIXME: This works on big-endian for composite byvals, which are the common
+      // case. It should also work for fundamental types too.
        unsigned FrameIdx =
-          MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
+        MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
        SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy);
        InVals.push_back(FrameIdxN);
  
@@ -1737,13 +1739,33 @@ SDValue ARM64TargetLowering::LowerFormalArguments(
        assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
        unsigned ArgOffset = VA.getLocMemOffset();
        unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
-      int FI = MFI->CreateFixedObject(ArgSize, ArgOffset, true);
+
+      uint32_t BEAlign = 0;
+      if (ArgSize < 8 && !Subtarget->isLittleEndian())
+        BEAlign = 8 - ArgSize;
+
+      int FI = MFI->CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
  
        // Create load nodes to retrieve arguments from the stack.
        SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
-      InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, FIN,
-                                   MachinePointerInfo::getFixedStack(FI), false,
-                                   false, false, 0));
+      SDValue ArgValue;
+
+      // If the loc type and val type are not the same, create an anyext load.
+      if (VA.getLocVT().getSizeInBits() != VA.getValVT().getSizeInBits()) {
+        // We should only get here if this is a pure integer.
+        assert(!VA.getValVT().isVector() && VA.getValVT().isInteger() &&
+               "Only integer extension supported!");
+        ArgValue = DAG.getExtLoad(ISD::EXTLOAD, DL, VA.getValVT(), Chain, FIN,
+                                  MachinePointerInfo::getFixedStack(FI),
+                                  VA.getLocVT(),
+                                  false, false, false, 0);
+      } else {
+        ArgValue = DAG.getLoad(VA.getValVT(), DL, Chain, FIN,
+                               MachinePointerInfo::getFixedStack(FI), false,
+                               false, false, 0);
+      }
+
+      InVals.push_back(ArgValue);
      }
    }
  
@@ -2089,8 +2111,18 @@ SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI,
        // There's no reason we can't support stack args w/ tailcall, but
        // we currently don't, so assert if we see one.
        assert(!IsTailCall && "stack argument with tail call!?");
+
+      // FIXME: This works on big-endian for composite byvals, which are the common
+      // case. It should also work for fundamental types too.
+      uint32_t BEAlign = 0;
+      if (!Subtarget->isLittleEndian() && !Flags.isByVal()) {
+        unsigned OpSize = (VA.getLocVT().getSizeInBits() + 7) / 8;
+        if (OpSize < 8)
+          BEAlign = 8 - OpSize;
+      }
+
        unsigned LocMemOffset = VA.getLocMemOffset();
-      SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+      SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset + BEAlign);
        PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
  
        if (Outs[i].Flags.isByVal()) {
diff --git a/test/CodeGen/AArch64/adc.ll b/test/CodeGen/AArch64/adc.ll

index 01922ede228963fa87b2b49b93eff641f5efe95b..8742e450897c7b4ddf66fbbaa035dca9c41f9638 100644 (file)
--- a/test/CodeGen/AArch64/adc.ll
+++ b/test/CodeGen/AArch64/adc.ll
@@ -1,6 +1,7 @@
  ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s
  ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
  ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-apple-ios7.0 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
  
  define i128 @test_simple(i128 %a, i128 %b, i128 %c) {
  ; CHECK-LABEL: test_simple:
diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll

index e2109e658f7f15ff8f7c429ebacbe144cfc88eb9..5b3e6c89db6e4aa90ee0446c291f5a9bd4b2c581 100644 (file)
--- a/test/CodeGen/AArch64/func-argpassing.ll
+++ b/test/CodeGen/AArch64/func-argpassing.ll
@@ -1,9 +1,12 @@
  ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-AARCH64 --check-prefix=CHECK-LE %s
  ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE-AARCH64 --check-prefix=CHECK-BE %s
  ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+
  ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM64 %s
  ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE --check-prefix=CHECK-ARM64-BE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
  
  %myStruct = type { i64 , i8, i32 }
  
@@ -152,7 +155,7 @@ define i32 @struct_on_stack(i8 %var0, i16 %var1, i32 %var2, i64 %var3, i128 %var
      %retval = load volatile i32* %stacked
      ret i32 %retval
  ; CHECK-LE: ldr w0, [sp, #16]
-; CHECK-BE: ldr w0, [sp, #20]
+; CHECK-BE-AARCH64: ldr w0, [sp, #20]
  }
  
  define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
@@ -162,8 +165,10 @@ define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
      store float %var8, float* @varfloat
      ; Beware as above: the offset would be different on big-endian
      ; machines if the first ldr were changed to use s-registers.
-; CHECK: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp]
-; CHECK: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat]
+; CHECK-ARM64: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp]
+; CHECK-AARCH64: ldr {{[ds]}}[[VALFLOAT:[0-9]+]], [sp]
+; CHECK-ARM64: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat]
+; CHECK-AARCH64: str s[[VALFLOAT]], [{{x[0-9]+}}, {{#?}}:lo12:varfloat]
  
      ret void
  }
@@ -188,7 +193,7 @@ define void @check_i128_stackalign(i32 %val0, i32 %val1, i32 %val2, i32 %val3,
      ; Nothing local on stack in current codegen, so first stack is 16 away
  ; CHECK-LE: add     x[[REG:[0-9]+]], sp, #16
  ; CHECK-LE: ldr {{x[0-9]+}}, [x[[REG]], #8]
-; CHECK-BE: ldr {{x[0-9]+}}, [sp, #24]
+; CHECK-BE-AARCH64: ldr {{x[0-9]+}}, [sp, #24]
  
      ; Important point is that we address sp+24 for second dword
  ; CHECK-AARCH64: ldr     {{x[0-9]+}}, [sp, #16]
@@ -205,3 +210,14 @@ define i32 @test_extern() {
  ; CHECK: bl memcpy
    ret i32 0
  }
+
+
+; A sub-i32 stack argument must be loaded on big endian with ldr{h,b}, not just
+; implicitly extended to a 32-bit load.
+define i16 @stacked_i16(i32 %val0, i32 %val1, i32 %val2, i32 %val3,
+                        i32 %val4, i32 %val5, i32 %val6, i32 %val7,
+                        i16 %stack1) {
+; CHECK-LABEL: stacked_i16
+; CHECK-ARM64-BE: ldrh
+  ret i16 %stack1
+}
diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll

index 26c705700a610544984abe3aead52d4bdc03cf4e..d216e3239d59f3cb35bce65f33ce70ea4e40c334 100644 (file)
--- a/test/CodeGen/AArch64/func-calls.ll
+++ b/test/CodeGen/AArch64/func-calls.ll
@@ -2,9 +2,11 @@
  ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
  ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
  ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-BE --check-prefix=CHECK-NOFP %s
+
  ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
  ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-neon | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARM64-NONEON %s
  ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
  
  %myStruct = type { i64 , i8, i32 }
  
@@ -149,9 +151,9 @@ define void @check_i128_align() {
  
    call void @check_i128_regalign(i32 0, i128 42)
  ; CHECK-NOT: mov x1
-; CHECK-LE: movz x2, #42
+; CHECK-LE: movz x2, #{{0x2a|42}}
  ; CHECK-LE: mov x3, xzr
-; CHECK-BE: movz x3, #42
+; CHECK-BE: movz {{x|w}}3, #{{0x2a|42}}
  ; CHECK-BE: mov x2, xzr
  ; CHECK: bl check_i128_regalign
  
diff --git a/test/CodeGen/AArch64/mul-lohi.ll b/test/CodeGen/AArch64/mul-lohi.ll

index e9493efe8fd075aeab0bca52959a8df59f67b8b7..3b027f2d4f1073ca409740be7cbe45c55f256b17 100644 (file)
--- a/test/CodeGen/AArch64/mul-lohi.ll
+++ b/test/CodeGen/AArch64/mul-lohi.ll
@@ -1,6 +1,7 @@
  ; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
  ; RUN: llc -mtriple=aarch64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s
  ; RUN: llc -mtriple=arm64-apple-ios7.0 %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s
  
  define i128 @test_128bitmul(i128 %lhs, i128 %rhs) {
  ; CHECK-LABEL: test_128bitmul:
diff --git a/test/CodeGen/ARM64/aapcs.ll b/test/CodeGen/ARM64/aapcs.ll

index bd206a48b732e069c8be2146845ea4979c5e11e7..b713f0d5a5315ed31af238171b2747b47fa4e8a8 100644 (file)
--- a/test/CodeGen/ARM64/aapcs.ll
+++ b/test/CodeGen/ARM64/aapcs.ll
@@ -21,7 +21,7 @@ define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short,
  
    %ext_bool = zext i1 %bool to i64
    store volatile i64 %ext_bool, i64* @var64, align 8
-; CHECK: ldr w[[EXT:[0-9]+]], [sp]
+; CHECK: ldrb w[[EXT:[0-9]+]], [sp]
  ; CHECK: and x[[EXTED:[0-9]+]], x[[EXT]], #0x1
  ; CHECK: str x[[EXTED]], [{{x[0-9]+}}, :lo12:var64]
  
@@ -37,7 +37,7 @@ define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short,
  
    %ext_int = zext i32 %int to i64
    store volatile i64 %ext_int, i64* @var64, align 8
-; CHECK: ldr w[[EXT:[0-9]+]], [sp, #24]
+; CHECK: ldr{{b?}} w[[EXT:[0-9]+]], [sp, #24]
  ; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
  
    store volatile i64 %long, i64* @var64, align 8
author	James Molloy <james.molloy@arm.com>
	Wed, 7 May 2014 11:28:36 +0000 (11:28 +0000)
committer	James Molloy <james.molloy@arm.com>
	Wed, 7 May 2014 11:28:36 +0000 (11:28 +0000)
lib/Target/ARM64/ARM64ISelLowering.cpp		patch \| blob \| history
test/CodeGen/AArch64/adc.ll		patch \| blob \| history
test/CodeGen/AArch64/func-argpassing.ll		patch \| blob \| history
test/CodeGen/AArch64/func-calls.ll		patch \| blob \| history
test/CodeGen/AArch64/mul-lohi.ll		patch \| blob \| history
test/CodeGen/ARM64/aapcs.ll		patch \| blob \| history