Fix program crashes due to alignment exceptions generated for SSE memop instructions...

author Sanjay Patel <spatel@rotateright.com>

Tue, 3 Feb 2015 17:13:04 +0000 (17:13 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Tue, 3 Feb 2015 17:13:04 +0000 (17:13 +0000)
author Sanjay Patel <spatel@rotateright.com>
Tue, 3 Feb 2015 17:13:04 +0000 (17:13 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Tue, 3 Feb 2015 17:13:04 +0000 (17:13 +0000)
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td

index ab3319afe93f0a78fdb7686eafad1818fc2d264f..30b3b2876b8a9f86e34119dc4e4267a810076363 100644 (file)
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -132,9 +132,9 @@ def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
  def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
                                        "Enable XOP instructions",
                                        [FeatureFMA4]>;
-def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
-                                          "HasVectorUAMem", "true",
-                 "Allow unaligned memory operands on vector/SIMD instructions">;
+def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem",
+                                          "HasSSEUnalignedMem", "true",
+                      "Allow unaligned memory operands with SSE instructions">;
  def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
                                        "Enable AES instructions",
                                        [FeatureSSE2]>;
@@ -309,7 +309,6 @@ class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
                                         FeatureCMPXCHG16B,
                                         FeatureFastUAMem,
                                         FeatureSlowUAMem32,
-                                       FeatureVectorUAMem,
                                         FeaturePOPCNT,
                                         FeatureAES,
                                         FeaturePCLMUL
@@ -322,7 +321,6 @@ class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
                                       FeatureCMPXCHG16B,
                                       FeatureFastUAMem,
                                       FeatureSlowUAMem32,
-                                     FeatureVectorUAMem,
                                       FeaturePOPCNT,
                                       FeatureAES,
                                       FeaturePCLMUL,
@@ -337,7 +335,6 @@ class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [
                                     FeatureAVX2,
                                     FeatureCMPXCHG16B,
                                     FeatureFastUAMem,
-                                   FeatureVectorUAMem,
                                     FeaturePOPCNT,
                                     FeatureAES,
                                     FeaturePCLMUL,
@@ -360,7 +357,6 @@ class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
                                       FeatureAVX2,
                                       FeatureCMPXCHG16B,
                                       FeatureFastUAMem,
-                                     FeatureVectorUAMem,
                                       FeaturePOPCNT,
                                       FeatureAES,
                                       FeaturePCLMUL,
@@ -388,7 +384,7 @@ class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,
                        FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
                        FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
                        FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
-                      FeatureSlowIncDec, FeatureVectorUAMem]>;
+                      FeatureSlowIncDec]>;
  def : KnightsLandingProc<"knl">;
  
  // FIXME: define SKX model
@@ -399,7 +395,7 @@ class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel,
                        FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
                        FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
                        FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
-                      FeatureSlowIncDec, FeatureSGX, FeatureVectorUAMem]>;
+                      FeatureSlowIncDec, FeatureSGX]>;
  def : SkylakeProc<"skylake">;
  def : SkylakeProc<"skx">; // Legacy alias.
  
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td

index 85768aeb4e47d8a4ac50788e3861b529d21e593f..e5de404ed218d7fa64c7c94361064b065b694614 100644 (file)
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -433,7 +433,7 @@ def alignedloadv8i64  : PatFrag<(ops node:$ptr),
  // setting a feature bit in the processor (on startup, for example).
  // Opteron 10h and later implement such a feature.
  def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
-  return    Subtarget->hasVectorUAMem()
+  return    Subtarget->hasSSEUnalignedMem()
           || cast<LoadSDNode>(N)->getAlignment() >= 16;
  }]>;
  
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp

index 01889e887c2a3052f13880fd02787f5d35ce2b19..e90da0f98a65ece393cfc22735966646c21a5e8d 100644 (file)
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -265,7 +265,7 @@ void X86Subtarget::initializeEnvironment() {
    IsSHLDSlow = false;
    IsUAMemFast = false;
    IsUAMem32Slow = false;
-  HasVectorUAMem = false;
+  HasSSEUnalignedMem = false;
    HasCmpxchg16b = false;
    UseLeaForSP = false;
    HasSlowDivide32 = false;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h

index 27dec6596435daf6baebbae5639bf29fc9bdf003..417f1332c546ca0171c04bf933b6ce08a4652867 100644 (file)
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -162,9 +162,9 @@ protected:
    /// True if unaligned 32-byte memory accesses are slow.
    bool IsUAMem32Slow;
  
-  /// HasVectorUAMem - True if SIMD operations can have unaligned memory
-  /// operands. This may require setting a feature bit in the processor.
-  bool HasVectorUAMem;
+  /// True if SSE operations can have unaligned memory operands.
+  /// This may require setting a configuration bit in the processor.
+  bool HasSSEUnalignedMem;
  
    /// HasCmpxchg16b - True if this processor has the CMPXCHG16B instruction;
    /// this is true for most x86-64 chips, but not the first AMD chips.
@@ -375,7 +375,7 @@ public:
    bool isSHLDSlow() const { return IsSHLDSlow; }
    bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
    bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
-  bool hasVectorUAMem() const { return HasVectorUAMem; }
+  bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
    bool hasCmpxchg16b() const { return HasCmpxchg16b; }
    bool useLeaForSP() const { return UseLeaForSP; }
    bool hasSlowDivide32() const { return HasSlowDivide32; }
diff --git a/test/CodeGen/X86/2010-01-07-UAMemFeature.ll b/test/CodeGen/X86/2010-01-07-UAMemFeature.ll

deleted file mode 100644 (file)

index bb24adb..0000000
--- a/test/CodeGen/X86/2010-01-07-UAMemFeature.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc -mcpu=yonah -mattr=vector-unaligned-mem -march=x86 < %s | FileCheck %s
-; CHECK: addps (
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind {
-       %A = load <4 x float>* %P, align 4
-       %B = fadd <4 x float> %A, %In
-       ret <4 x float> %B
-}
diff --git a/test/CodeGen/X86/fold-vex.ll b/test/CodeGen/X86/fold-vex.ll

index a0c5e22b1c06f7feb51f646ff74158cdd3791da3..5a8b1d8cbfdf6f4d6c5d7c1e56f105efbd5aa8a8 100644 (file)
--- a/test/CodeGen/X86/fold-vex.ll
+++ b/test/CodeGen/X86/fold-vex.ll
@@ -1,12 +1,18 @@
  ; Use CPU parameters to ensure that a CPU-specific attribute is not overriding the AVX definition.
  
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown                  -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx             | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2                 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown                  -mattr=-avx | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx -mattr=-avx | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2     -mattr=-avx | FileCheck %s --check-prefix=SSE
  
  ; No need to load unaligned operand from memory using an explicit instruction with AVX.
  ; The operand should be folded into the AND instr.
  
+; With SSE, folding memory operands into math/logic ops requires 16-byte alignment
+; unless specially configured on some CPUs such as AMD Family 10H.
+
  define <4 x i32> @test1(<4 x i32>* %p0, <4 x i32> %in1) nounwind {
    %in0 = load <4 x i32>* %p0, align 2
    %a = and <4 x i32> %in0, %in1
@@ -16,5 +22,10 @@ define <4 x i32> @test1(<4 x i32>* %p0, <4 x i32> %in1) nounwind {
  ; CHECK-NOT:   vmovups
  ; CHECK:       vandps (%rdi), %xmm0, %xmm0
  ; CHECK-NEXT:  ret
+
+; SSE-LABEL: @test1
+; SSE:       movups (%rdi), %xmm1
+; SSE-NEXT:  andps %xmm1, %xmm0
+; SSE-NEXT:  ret
  }
  
diff --git a/test/CodeGen/X86/sse-unaligned-mem-feature.ll b/test/CodeGen/X86/sse-unaligned-mem-feature.ll

new file mode 100644 (file)

index 0000000..15f91ee
--- /dev/null
+++ b/test/CodeGen/X86/sse-unaligned-mem-feature.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mcpu=yonah -mattr=sse-unaligned-mem -march=x86 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind {
+       %A = load <4 x float>* %P, align 4
+       %B = fadd <4 x float> %A, %In
+       ret <4 x float> %B
+
+; CHECK-LABEL: @foo
+; CHECK:       addps (
+}
author	Sanjay Patel <spatel@rotateright.com>
	Tue, 3 Feb 2015 17:13:04 +0000 (17:13 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Tue, 3 Feb 2015 17:13:04 +0000 (17:13 +0000)
lib/Target/X86/X86.td		patch \| blob \| history
lib/Target/X86/X86InstrFragmentsSIMD.td		patch \| blob \| history
lib/Target/X86/X86Subtarget.cpp		patch \| blob \| history
lib/Target/X86/X86Subtarget.h		patch \| blob \| history
test/CodeGen/X86/2010-01-07-UAMemFeature.ll	[deleted file]	patch \| blob \| history
test/CodeGen/X86/fold-vex.ll		patch \| blob \| history
test/CodeGen/X86/sse-unaligned-mem-feature.ll	[new file with mode: 0644]	patch \| blob