From e8bc8a7d585d848b2ecf25ec979c24c5a2fac78b Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 15 Apr 2014 14:00:03 +0000 Subject: [PATCH] AArch64/ARM64: add half as a storage type on ARM64. This brings it into line with the AArch64 behaviour and should open the way for certain OpenCL features. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206286 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM64/ARM64ISelLowering.cpp | 4 ++++ lib/Target/ARM64/ARM64InstrFormats.td | 16 ++++++++-------- lib/Target/ARM64/ARM64InstrInfo.td | 12 ++++++------ lib/Target/ARM64/ARM64RegisterInfo.td | 2 +- test/CodeGen/AArch64/floatdp_1source.ll | 1 + 5 files changed, 20 insertions(+), 15 deletions(-) diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp index 205cea669e3..cfb9d1e2da3 100644 --- a/lib/Target/ARM64/ARM64ISelLowering.cpp +++ b/lib/Target/ARM64/ARM64ISelLowering.cpp @@ -84,6 +84,7 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) // Set up the register classes. addRegisterClass(MVT::i32, &ARM64::GPR32allRegClass); addRegisterClass(MVT::i64, &ARM64::GPR64allRegClass); + addRegisterClass(MVT::f16, &ARM64::FPR16RegClass); addRegisterClass(MVT::f32, &ARM64::FPR32RegClass); addRegisterClass(MVT::f64, &ARM64::FPR64RegClass); addRegisterClass(MVT::f128, &ARM64::FPR128RegClass); @@ -370,10 +371,13 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand); setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f128, MVT::f80, Expand); setTruncStoreAction(MVT::f128, MVT::f64, Expand); setTruncStoreAction(MVT::f128, MVT::f32, Expand); + setTruncStoreAction(MVT::f128, MVT::f16, Expand); setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); // Indexed loads and stores are supported. for (unsigned im = (unsigned)ISD::PRE_INC; diff --git a/lib/Target/ARM64/ARM64InstrFormats.td b/lib/Target/ARM64/ARM64InstrFormats.td index 1e3b3bc31ce..76fe443e2f5 100644 --- a/lib/Target/ARM64/ARM64InstrFormats.td +++ b/lib/Target/ARM64/ARM64InstrFormats.td @@ -3396,28 +3396,28 @@ class BaseFPConversion type, bits<2> opcode, RegisterClass dstType, multiclass FPConversion { // Double-precision to Half-precision - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in - def HDr : BaseFPConversion<0b01, 0b11, FPR16, FPR64, asm, []>; + def HDr : BaseFPConversion<0b01, 0b11, FPR16, FPR64, asm, + [(set FPR16:$Rd, (fround FPR64:$Rn))]>; // Double-precision to Single-precision def SDr : BaseFPConversion<0b01, 0b00, FPR32, FPR64, asm, [(set FPR32:$Rd, (fround FPR64:$Rn))]>; // Half-precision to Double-precision - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in - def DHr : BaseFPConversion<0b11, 0b01, FPR64, FPR16, asm, []>; + def DHr : BaseFPConversion<0b11, 0b01, FPR64, FPR16, asm, + [(set FPR64:$Rd, (fextend FPR16:$Rn))]>; // Half-precision to Single-precision - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in - def SHr : BaseFPConversion<0b11, 0b00, FPR32, FPR16, asm, []>; + def SHr : BaseFPConversion<0b11, 0b00, FPR32, FPR16, asm, + [(set FPR32:$Rd, (fextend FPR16:$Rn))]>; // Single-precision to Double-precision def DSr : BaseFPConversion<0b00, 0b01, FPR64, FPR32, asm, [(set FPR64:$Rd, (fextend FPR32:$Rn))]>; // Single-precision to Half-precision - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in - def HSr : BaseFPConversion<0b00, 0b11, FPR16, FPR32, asm, []>; + def HSr : BaseFPConversion<0b00, 0b11, FPR16, FPR32, asm, + [(set FPR16:$Rd, (fround FPR32:$Rn))]>; } //--- diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td index 154fdd11568..29783409363 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.td +++ b/lib/Target/ARM64/ARM64InstrInfo.td @@ -1022,7 +1022,7 @@ def LDRXro : Load64RO<0b11, 0, 0b01, GPR64, "ldr", def LDRBro : Load8RO<0b00, 1, 0b01, FPR8, "ldr", [(set FPR8:$Rt, (load ro_indexed8:$addr))]>; def LDRHro : Load16RO<0b01, 1, 0b01, FPR16, "ldr", - [(set FPR16:$Rt, (load ro_indexed16:$addr))]>; + [(set (f16 FPR16:$Rt), (load ro_indexed16:$addr))]>; def LDRSro : Load32RO<0b10, 1, 0b01, FPR32, "ldr", [(set (f32 FPR32:$Rt), (load ro_indexed32:$addr))]>; def LDRDro : Load64RO<0b11, 1, 0b01, FPR64, "ldr", @@ -1132,7 +1132,7 @@ def LDRWui : LoadUI<0b10, 0, 0b01, GPR32, am_indexed32, "ldr", def LDRBui : LoadUI<0b00, 1, 0b01, FPR8, am_indexed8, "ldr", [(set FPR8:$Rt, (load am_indexed8:$addr))]>; def LDRHui : LoadUI<0b01, 1, 0b01, FPR16, am_indexed16, "ldr", - [(set FPR16:$Rt, (load am_indexed16:$addr))]>; + [(set (f16 FPR16:$Rt), (load am_indexed16:$addr))]>; def LDRSui : LoadUI<0b10, 1, 0b01, FPR32, am_indexed32, "ldr", [(set (f32 FPR32:$Rt), (load am_indexed32:$addr))]>; def LDRDui : LoadUI<0b11, 1, 0b01, FPR64, am_indexed64, "ldr", @@ -1261,7 +1261,7 @@ def LDURWi : LoadUnscaled<0b10, 0, 0b01, GPR32, am_unscaled32, "ldur", def LDURBi : LoadUnscaled<0b00, 1, 0b01, FPR8, am_unscaled8, "ldur", [(set FPR8:$Rt, (load am_unscaled8:$addr))]>; def LDURHi : LoadUnscaled<0b01, 1, 0b01, FPR16, am_unscaled16, "ldur", - [(set FPR16:$Rt, (load am_unscaled16:$addr))]>; + [(set (f16 FPR16:$Rt), (load am_unscaled16:$addr))]>; def LDURSi : LoadUnscaled<0b10, 1, 0b01, FPR32, am_unscaled32, "ldur", [(set (f32 FPR32:$Rt), (load am_unscaled32:$addr))]>; def LDURDi : LoadUnscaled<0b11, 1, 0b01, FPR64, am_unscaled64, "ldur", @@ -1575,7 +1575,7 @@ def : Pat<(truncstorei32 GPR64:$Rt, ro_indexed32:$addr), def STRBro : Store8RO<0b00, 1, 0b00, FPR8, "str", [(store FPR8:$Rt, ro_indexed8:$addr)]>; def STRHro : Store16RO<0b01, 1, 0b00, FPR16, "str", - [(store FPR16:$Rt, ro_indexed16:$addr)]>; + [(store (f16 FPR16:$Rt), ro_indexed16:$addr)]>; def STRSro : Store32RO<0b10, 1, 0b00, FPR32, "str", [(store (f32 FPR32:$Rt), ro_indexed32:$addr)]>; def STRDro : Store64RO<0b11, 1, 0b00, FPR64, "str", @@ -1623,7 +1623,7 @@ def STRWui : StoreUI<0b10, 0, 0b00, GPR32, am_indexed32, "str", def STRBui : StoreUI<0b00, 1, 0b00, FPR8, am_indexed8, "str", [(store FPR8:$Rt, am_indexed8:$addr)]>; def STRHui : StoreUI<0b01, 1, 0b00, FPR16, am_indexed16, "str", - [(store FPR16:$Rt, am_indexed16:$addr)]>; + [(store (f16 FPR16:$Rt), am_indexed16:$addr)]>; def STRSui : StoreUI<0b10, 1, 0b00, FPR32, am_indexed32, "str", [(store (f32 FPR32:$Rt), am_indexed32:$addr)]>; def STRDui : StoreUI<0b11, 1, 0b00, FPR64, am_indexed64, "str", @@ -1686,7 +1686,7 @@ def STURWi : StoreUnscaled<0b10, 0, 0b00, GPR32, am_unscaled32, "stur", def STURBi : StoreUnscaled<0b00, 1, 0b00, FPR8, am_unscaled8, "stur", [(store FPR8:$Rt, am_unscaled8:$addr)]>; def STURHi : StoreUnscaled<0b01, 1, 0b00, FPR16, am_unscaled16, "stur", - [(store FPR16:$Rt, am_unscaled16:$addr)]>; + [(store (f16 FPR16:$Rt), am_unscaled16:$addr)]>; def STURSi : StoreUnscaled<0b10, 1, 0b00, FPR32, am_unscaled32, "stur", [(store (f32 FPR32:$Rt), am_unscaled32:$addr)]>; def STURDi : StoreUnscaled<0b11, 1, 0b00, FPR64, am_unscaled64, "stur", diff --git a/lib/Target/ARM64/ARM64RegisterInfo.td b/lib/Target/ARM64/ARM64RegisterInfo.td index 83a8f70a8c7..514ba07bb0a 100644 --- a/lib/Target/ARM64/ARM64RegisterInfo.td +++ b/lib/Target/ARM64/ARM64RegisterInfo.td @@ -368,7 +368,7 @@ def Q31 : ARM64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias; def FPR8 : RegisterClass<"ARM64", [untyped], 8, (sequence "B%u", 0, 31)> { let Size = 8; } -def FPR16 : RegisterClass<"ARM64", [untyped], 16, (sequence "H%u", 0, 31)> { +def FPR16 : RegisterClass<"ARM64", [f16], 16, (sequence "H%u", 0, 31)> { let Size = 16; } def FPR32 : RegisterClass<"ARM64", [f32, i32], 32,(sequence "S%u", 0, 31)>; diff --git a/test/CodeGen/AArch64/floatdp_1source.ll b/test/CodeGen/AArch64/floatdp_1source.ll index 3d7f8f0369f..5d11d3f0e21 100644 --- a/test/CodeGen/AArch64/floatdp_1source.ll +++ b/test/CodeGen/AArch64/floatdp_1source.ll @@ -1,4 +1,5 @@ ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s +; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s @varhalf = global half 0.0 @varfloat = global float 0.0 -- 2.34.1