ADDS{D|S}rr_Int and MULS{D|S}rr_Int are not commutable. The users of these intrinsics...

author Evan Cheng <evan.cheng@apple.com>

Thu, 26 Feb 2009 03:12:02 +0000 (03:12 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Thu, 26 Feb 2009 03:12:02 +0000 (03:12 +0000)
author Evan Cheng <evan.cheng@apple.com>
Thu, 26 Feb 2009 03:12:02 +0000 (03:12 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Thu, 26 Feb 2009 03:12:02 +0000 (03:12 +0000)
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td

index b435d8e68996db98f5be93127a6d8ddd24652ba9..37ba59c92186d22439ef3628ee66c9d6bb2b4be3 100644 (file)
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -19,13 +19,13 @@
  let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
    def int_x86_sse_add_ss : GCCBuiltin<"__builtin_ia32_addss">,
                Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem, Commutative]>;
+                         llvm_v4f32_ty], [IntrNoMem]>;
    def int_x86_sse_sub_ss : GCCBuiltin<"__builtin_ia32_subss">,
                Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
                           llvm_v4f32_ty], [IntrNoMem]>;
    def int_x86_sse_mul_ss : GCCBuiltin<"__builtin_ia32_mulss">,
                Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem, Commutative]>;
+                         llvm_v4f32_ty], [IntrNoMem]>;
    def int_x86_sse_div_ss : GCCBuiltin<"__builtin_ia32_divss">,
                Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
                           llvm_v4f32_ty], [IntrNoMem]>;
@@ -176,13 +176,13 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
  let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
    def int_x86_sse2_add_sd : GCCBuiltin<"__builtin_ia32_addsd">,
                Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem, Commutative]>;
+                         llvm_v2f64_ty], [IntrNoMem]>;
    def int_x86_sse2_sub_sd : GCCBuiltin<"__builtin_ia32_subsd">,
                Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
                           llvm_v2f64_ty], [IntrNoMem]>;
    def int_x86_sse2_mul_sd : GCCBuiltin<"__builtin_ia32_mulsd">,
                Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem, Commutative]>;
+                         llvm_v2f64_ty], [IntrNoMem]>;
    def int_x86_sse2_div_sd : GCCBuiltin<"__builtin_ia32_divsd">,
                Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
                           llvm_v2f64_ty], [IntrNoMem]>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td

index 407b4f1b063ddb5a4298016d55a1b643beba2ea4..3e00c3b4a0d864dc2f43d0cf3df0b4a55d6775d9 100644 (file)
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -526,7 +526,7 @@ def FsANDNPSrm : PSI<0x55, MRMSrcMem,
  /// In addition, we also have a special variant of the scalar form here to
  /// represent the associated intrinsic operation.  This form is unlike the
  /// plain scalar form, in that it takes an entire vector (instead of a scalar)
-/// and leaves the top elements undefined.
+/// and leaves the top elements unmodified (therefore these cannot be commuted).
  ///
  /// These three forms can each be reg+reg or reg+mem, so there are a total of
  /// six "instructions".
@@ -566,9 +566,7 @@ multiclass basic_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
    def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
                                       (ins VR128:$src1, VR128:$src2),
                       !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
-                     [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
-    let isCommutable = Commutable;
-  }
+                     [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]>;
  
    // Intrinsic operation, reg+mem.
    def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
@@ -1275,7 +1273,7 @@ def FsANDNPDrm : PDI<0x55, MRMSrcMem,
  /// In addition, we also have a special variant of the scalar form here to
  /// represent the associated intrinsic operation.  This form is unlike the
  /// plain scalar form, in that it takes an entire vector (instead of a scalar)
-/// and leaves the top elements undefined.
+/// and leaves the top elements unmodified (therefore these cannot be commuted).
  ///
  /// These three forms can each be reg+reg or reg+mem, so there are a total of
  /// six "instructions".
@@ -1315,9 +1313,7 @@ multiclass basic_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
    def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst),
                                       (ins VR128:$src1, VR128:$src2),
                       !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
-                     [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
-    let isCommutable = Commutable;
-  }
+                     [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]>;
  
    // Intrinsic operation, reg+mem.
    def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
diff --git a/test/CodeGen/X86/2009-02-25-CommuteBug.ll b/test/CodeGen/X86/2009-02-25-CommuteBug.ll

new file mode 100644 (file)

index 0000000..b772bf8
--- /dev/null
+++ b/test/CodeGen/X86/2009-02-25-CommuteBug.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& not grep commuted
+; rdar://6608609
+
+define <2 x double> @t(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind readnone {
+entry:
+       %tmp.i2 = bitcast <2 x double> %B to <2 x i64>          ; <<2 x i64>> [#uses=1]
+       %tmp2.i = or <2 x i64> %tmp.i2, <i64 4607632778762754458, i64 4607632778762754458>              ; <<2 x i64>> [#uses=1]
+       %tmp3.i = bitcast <2 x i64> %tmp2.i to <2 x double>             ; <<2 x double>> [#uses=1]
+       %0 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %A, <2 x double> %tmp3.i) nounwind readnone              ; <<2 x double>> [#uses=1]
+       %tmp.i = add <2 x double> %0, %C                ; <<2 x double>> [#uses=1]
+       ret <2 x double> %tmp.i
+}
+
+declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
author	Evan Cheng <evan.cheng@apple.com>
	Thu, 26 Feb 2009 03:12:02 +0000 (03:12 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Thu, 26 Feb 2009 03:12:02 +0000 (03:12 +0000)
include/llvm/IntrinsicsX86.td		patch \| blob \| history
lib/Target/X86/X86InstrSSE.td		patch \| blob \| history
test/CodeGen/X86/2009-02-25-CommuteBug.ll	[new file with mode: 0644]	patch \| blob