From d168cef6647c3a02e5b22d8d651e344af6b96fe2 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Wed, 3 Nov 2010 16:24:53 +0000 Subject: [PATCH] Add codegen patterns for VST1-lane instructions. Radar 8599955. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@118176 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 25 ++++++++++----- test/CodeGen/ARM/vget_lane.ll | 12 ++++--- test/CodeGen/ARM/vldlane.ll | 2 +- test/CodeGen/ARM/vstlane.ll | 57 ++++++++++++++++++++++++++++++++++ 4 files changed, 83 insertions(+), 13 deletions(-) diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index a913664a1fe..d4af01cfe16 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1126,28 +1126,37 @@ class VSTQQQQLNWBPseudo nohash_imm:$lane), itin, "$addr.addr = $wb">; // VST1LN : Vector Store (single element from one lane) -class VST1LN op11_8, bits<4> op7_4, string Dt> +class VST1LN op11_8, bits<4> op7_4, string Dt, ValueType Ty, + PatFrag StoreOp, SDNode ExtractOp> : NLdStLn<1, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane), - IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", []> { + IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", + [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> { let Rm = 0b1111; } +class VST1QLNPseudo + : VSTQLNPseudo { + let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), + addrmode6:$addr)]; +} -def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8"> { +def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8, + NEONvgetlaneu> { let Inst{7-5} = lane{2-0}; } -def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16"> { +def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16, + NEONvgetlaneu> { let Inst{7-6} = lane{1-0}; let Inst{4} = Rn{5}; } -def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32"> { +def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } -def VST1LNq8Pseudo : VSTQLNPseudo; -def VST1LNq16Pseudo : VSTQLNPseudo; -def VST1LNq32Pseudo : VSTQLNPseudo; +def VST1LNq8Pseudo : VST1QLNPseudo; +def VST1LNq16Pseudo : VST1QLNPseudo; +def VST1LNq32Pseudo : VST1QLNPseudo; let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { diff --git a/test/CodeGen/ARM/vget_lane.ll b/test/CodeGen/ARM/vget_lane.ll index 512dc5a67fb..1fc885d6137 100644 --- a/test/CodeGen/ARM/vget_lane.ll +++ b/test/CodeGen/ARM/vget_lane.ll @@ -102,7 +102,8 @@ entry: %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] %0 = load <4 x i16>* %arg0_uint16x4_t, align 8 ; <<4 x i16>> [#uses=1] %1 = extractelement <4 x i16> %0, i32 1 ; [#uses=1] - store i16 %1, i16* %out_uint16_t, align 2 + %2 = add i16 %1, %1 + store i16 %2, i16* %out_uint16_t, align 2 br label %return return: ; preds = %entry @@ -117,7 +118,8 @@ entry: %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] %0 = load <8 x i8>* %arg0_uint8x8_t, align 8 ; <<8 x i8>> [#uses=1] %1 = extractelement <8 x i8> %0, i32 1 ; [#uses=1] - store i8 %1, i8* %out_uint8_t, align 1 + %2 = add i8 %1, %1 + store i8 %2, i8* %out_uint8_t, align 1 br label %return return: ; preds = %entry @@ -132,7 +134,8 @@ entry: %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] %0 = load <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1] %1 = extractelement <8 x i16> %0, i32 1 ; [#uses=1] - store i16 %1, i16* %out_uint16_t, align 2 + %2 = add i16 %1, %1 + store i16 %2, i16* %out_uint16_t, align 2 br label %return return: ; preds = %entry @@ -147,7 +150,8 @@ entry: %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] %0 = load <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1] %1 = extractelement <16 x i8> %0, i32 1 ; [#uses=1] - store i8 %1, i8* %out_uint8_t, align 1 + %2 = add i8 %1, %1 + store i8 %2, i8* %out_uint8_t, align 1 br label %return return: ; preds = %entry diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll index 97ab399043a..fc14e24d867 100644 --- a/test/CodeGen/ARM/vldlane.ll +++ b/test/CodeGen/ARM/vldlane.ll @@ -22,7 +22,7 @@ define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind { define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind { ;CHECK: vld1lanei32: -;Check the alignment value. Max for this instruction is 16 bits: +;Check the alignment value. Max for this instruction is 32 bits: ;CHECK: vld1.32 {d16[1]}, [r0, :32] %tmp1 = load <2 x i32>* %B %tmp2 = load i32* %A, align 8 diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll index a48d6a0c1dc..c5387e04bdf 100644 --- a/test/CodeGen/ARM/vstlane.ll +++ b/test/CodeGen/ARM/vstlane.ll @@ -1,5 +1,62 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind { +;CHECK: vst1lanei8: +;Check the (default) alignment. +;CHECK: vst1.8 {d16[3]}, [r0] + %tmp1 = load <8 x i8>* %B + %tmp2 = extractelement <8 x i8> %tmp1, i32 3 + store i8 %tmp2, i8* %A, align 8 + ret void +} + +define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind { +;CHECK: vst1lanei16: +;Check the alignment value. Max for this instruction is 16 bits: +;CHECK: vst1.16 {d16[2]}, [r0, :16] + %tmp1 = load <4 x i16>* %B + %tmp2 = extractelement <4 x i16> %tmp1, i32 2 + store i16 %tmp2, i16* %A, align 8 + ret void +} + +define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind { +;CHECK: vst1lanei32: +;Check the alignment value. Max for this instruction is 32 bits: +;CHECK: vst1.32 {d16[1]}, [r0, :32] + %tmp1 = load <2 x i32>* %B + %tmp2 = extractelement <2 x i32> %tmp1, i32 1 + store i32 %tmp2, i32* %A, align 8 + ret void +} + +define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind { +;CHECK: vst1laneQi8: +;CHECK: vst1.8 {d17[1]}, [r0] + %tmp1 = load <16 x i8>* %B + %tmp2 = extractelement <16 x i8> %tmp1, i32 9 + store i8 %tmp2, i8* %A, align 8 + ret void +} + +define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind { +;CHECK: vst1laneQi16: +;CHECK: vst1.16 {d17[1]}, [r0, :16] + %tmp1 = load <8 x i16>* %B + %tmp2 = extractelement <8 x i16> %tmp1, i32 5 + store i16 %tmp2, i16* %A, align 8 + ret void +} + +define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind { +;CHECK: vst1laneQi32: +;CHECK: vst1.32 {d17[1]}, [r0, :32] + %tmp1 = load <4 x i32>* %B + %tmp2 = extractelement <4 x i32> %tmp1, i32 3 + store i32 %tmp2, i32* %A, align 8 + ret void +} + define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst2lanei8: ;Check the alignment value. Max for this instruction is 16 bits: -- 2.34.1