From 22f4488b9f4e9de6e37850c0730b41fe46adef5c Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Mon, 26 Oct 2015 21:32:53 +0000 Subject: [PATCH] ARM: make sure VFP loads and stores are properly aligned. Both VLDRS and VLDRD fault if the memory is not 4 byte aligned, which wasn't really being checked before, leading to faults at runtime. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@251352 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrVFP.td | 22 +++-- test/CodeGen/ARM/unaligned_load_store_vfp.ll | 98 ++++++++++++++++++++ 2 files changed, 110 insertions(+), 10 deletions(-) create mode 100644 test/CodeGen/ARM/unaligned_load_store_vfp.ll diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index eef5634d821..2aea73a6336 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -92,7 +92,7 @@ def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr), def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), IIC_fpLoad32, "vldr", "\t$Sd, $addr", - [(set SPR:$Sd, (load addrmode5:$addr))]> { + [(set SPR:$Sd, (alignedload32 addrmode5:$addr))]> { // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. let D = VFPNeonDomain; @@ -106,7 +106,7 @@ def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr), def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr), IIC_fpStore32, "vstr", "\t$Sd, $addr", - [(store SPR:$Sd, addrmode5:$addr)]> { + [(alignedstore32 SPR:$Sd, addrmode5:$addr)]> { // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. let D = VFPNeonDomain; @@ -1018,7 +1018,7 @@ let Predicates=[HasVFP2, HasDPVFP] in { def : VFPPat<(f64 (sint_to_fp GPR:$a)), (VSITOD (COPY_TO_REGCLASS GPR:$a, SPR))>; - def : VFPPat<(f64 (sint_to_fp (i32 (load addrmode5:$a)))), + def : VFPPat<(f64 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))), (VSITOD (VLDRS addrmode5:$a))>; } @@ -1036,7 +1036,7 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)), (VSITOS (COPY_TO_REGCLASS GPR:$a, SPR))>; -def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (load addrmode5:$a)))), +def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))), (VSITOS (VLDRS addrmode5:$a))>; def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, @@ -1050,7 +1050,7 @@ let Predicates=[HasVFP2, HasDPVFP] in { def : VFPPat<(f64 (uint_to_fp GPR:$a)), (VUITOD (COPY_TO_REGCLASS GPR:$a, SPR))>; - def : VFPPat<(f64 (uint_to_fp (i32 (load addrmode5:$a)))), + def : VFPPat<(f64 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))), (VUITOD (VLDRS addrmode5:$a))>; } @@ -1068,7 +1068,7 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)), (VUITOS (COPY_TO_REGCLASS GPR:$a, SPR))>; -def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (load addrmode5:$a)))), +def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))), (VUITOS (VLDRS addrmode5:$a))>; // FP -> Int: @@ -1121,7 +1121,7 @@ let Predicates=[HasVFP2, HasDPVFP] in { def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))), (COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>; - def : VFPPat<(store (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr), + def : VFPPat<(alignedstore32 (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr), (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>; } @@ -1139,7 +1139,8 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)), (COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>; -def : VFPNoNEONPat<(store (i32 (fp_to_sint (f32 SPR:$a))), addrmode5:$ptr), +def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))), + addrmode5:$ptr), (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>; def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, @@ -1153,7 +1154,7 @@ let Predicates=[HasVFP2, HasDPVFP] in { def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))), (COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>; - def : VFPPat<(store (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr), + def : VFPPat<(alignedstore32 (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr), (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>; } @@ -1171,7 +1172,8 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)), (COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>; -def : VFPNoNEONPat<(store (i32 (fp_to_uint (f32 SPR:$a))), addrmode5:$ptr), +def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))), + addrmode5:$ptr), (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>; // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. diff --git a/test/CodeGen/ARM/unaligned_load_store_vfp.ll b/test/CodeGen/ARM/unaligned_load_store_vfp.ll new file mode 100644 index 00000000000..90d17e19c28 --- /dev/null +++ b/test/CodeGen/ARM/unaligned_load_store_vfp.ll @@ -0,0 +1,98 @@ +; RUN: llc -mtriple=thumbv7-linux-gnueabihf %s -o - | FileCheck %s + +define float @test_load_s32_float(i32* %addr) { +; CHECK-LABEL: test_load_s32_float: +; CHECK: ldr [[TMP:r[0-9]+]], [r0] +; CHECK: vmov [[RES_INT:s[0-9]+]], [[TMP]] +; CHECK: vcvt.f32.s32 s0, [[RES_INT]] + + %val = load i32, i32* %addr, align 1 + %res = sitofp i32 %val to float + ret float %res +} + +define double @test_load_s32_double(i32* %addr) { +; CHECK-LABEL: test_load_s32_double: +; CHECK: ldr [[TMP:r[0-9]+]], [r0] +; CHECK: vmov [[RES_INT:s[0-9]+]], [[TMP]] +; CHECK: vcvt.f64.s32 d0, [[RES_INT]] + + %val = load i32, i32* %addr, align 1 + %res = sitofp i32 %val to double + ret double %res +} + +define float @test_load_u32_float(i32* %addr) { +; CHECK-LABEL: test_load_u32_float: +; CHECK: ldr [[TMP:r[0-9]+]], [r0] +; CHECK: vmov [[RES_INT:s[0-9]+]], [[TMP]] +; CHECK: vcvt.f32.u32 s0, [[RES_INT]] + + %val = load i32, i32* %addr, align 1 + %res = uitofp i32 %val to float + ret float %res +} + +define double @test_load_u32_double(i32* %addr) { +; CHECK-LABEL: test_load_u32_double: +; CHECK: ldr [[TMP:r[0-9]+]], [r0] +; CHECK: vmov [[RES_INT:s[0-9]+]], [[TMP]] +; CHECK: vcvt.f64.u32 d0, [[RES_INT]] + + %val = load i32, i32* %addr, align 1 + %res = uitofp i32 %val to double + ret double %res +} + +define void @test_store_f32(float %in, float* %addr) { +; CHECK-LABEL: test_store_f32: +; CHECK: vmov [[TMP:r[0-9]+]], s0 +; CHECK: str [[TMP]], [r0] + + store float %in, float* %addr, align 1 + ret void +} + +define void @test_store_float_s32(float %in, i32* %addr) { +; CHECK-LABEL: test_store_float_s32: +; CHECK: vcvt.s32.f32 [[TMP:s[0-9]+]], s0 +; CHECK: vmov [[TMP_INT:r[0-9]+]], [[TMP]] +; CHECK: str [[TMP_INT]], [r0] + + %val = fptosi float %in to i32 + store i32 %val, i32* %addr, align 1 + ret void +} + +define void @test_store_double_s32(double %in, i32* %addr) { +; CHECK-LABEL: test_store_double_s32: +; CHECK: vcvt.s32.f64 [[TMP:s[0-9]+]], d0 +; CHECK: vmov [[TMP_INT:r[0-9]+]], [[TMP]] +; CHECK: str [[TMP_INT]], [r0] + + %val = fptosi double %in to i32 + store i32 %val, i32* %addr, align 1 + ret void +} + +define void @test_store_float_u32(float %in, i32* %addr) { +; CHECK-LABEL: test_store_float_u32: +; CHECK: vcvt.u32.f32 [[TMP:s[0-9]+]], s0 +; CHECK: vmov [[TMP_INT:r[0-9]+]], [[TMP]] +; CHECK: str [[TMP_INT]], [r0] + + %val = fptoui float %in to i32 + store i32 %val, i32* %addr, align 1 + ret void +} + +define void @test_store_double_u32(double %in, i32* %addr) { +; CHECK-LABEL: test_store_double_u32: +; CHECK: vcvt.u32.f64 [[TMP:s[0-9]+]], d0 +; CHECK: vmov [[TMP_INT:r[0-9]+]], [[TMP]] +; CHECK: str [[TMP_INT]], [r0] + + %val = fptoui double %in to i32 + store i32 %val, i32* %addr, align 1 + ret void +} -- 2.34.1