[SDAG] Introduce a combined set to the DAG combiner which tracks nodes

author Chandler Carruth <chandlerc@gmail.com>

Thu, 24 Jul 2014 22:15:28 +0000 (22:15 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Thu, 24 Jul 2014 22:15:28 +0000 (22:15 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Thu, 24 Jul 2014 22:15:28 +0000 (22:15 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Thu, 24 Jul 2014 22:15:28 +0000 (22:15 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index b526523063bff819c3896fa9f90336c0b4a8f13d..379ea7f7f09d462d3339e629c6e8b942a7658878 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -104,6 +104,12 @@ namespace {
      /// stable indices of nodes within the worklist.
      DenseMap<SDNode *, unsigned> WorklistMap;
  
+    /// \brief Set of nodes which have been combined (at least once).
+    ///
+    /// This is used to allow us to reliably add any operands of a DAG node
+    /// which have not yet been combined to the worklist.
+    SmallPtrSet<SDNode *, 64> CombinedNodes;
+
      // AA - Used for DAG load/store alias analysis.
      AliasAnalysis &AA;
  
@@ -136,6 +142,8 @@ namespace {
      /// removeFromWorklist - remove all instances of N from the worklist.
      ///
      void removeFromWorklist(SDNode *N) {
+      CombinedNodes.erase(N);
+
        auto It = WorklistMap.find(N);
        if (It == WorklistMap.end())
          return; // Not in the worklist.
@@ -1152,6 +1160,17 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
      if (recursivelyDeleteUnusedNodes(N))
        continue;
  
+    DEBUG(dbgs() << "\nCombining: ";
+          N->dump(&DAG));
+
+    // Add any operands of the new node which have not yet been combined to the
+    // worklist as well. Because the worklist uniques things already, this
+    // won't repeatedly process the same operand.
+    CombinedNodes.insert(N);
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+      if (!CombinedNodes.count(N->getOperand(i).getNode()))
+        AddToWorklist(N->getOperand(i).getNode());
+
      WorklistRemover DeadNodes(*this);
  
      SDValue RV = combine(N);
@@ -1172,11 +1191,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
             RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
             "Node was deleted but visit returned new node!");
  
-    DEBUG(dbgs() << "\nReplacing.3 ";
-          N->dump(&DAG);
-          dbgs() << "\nWith: ";
-          RV.getNode()->dump(&DAG);
-          dbgs() << '\n');
+    DEBUG(dbgs() << " ... into: ";
+          RV.getNode()->dump(&DAG));
  
      // Transfer debug value.
      DAG.TransferDbgValues(SDValue(N, 0), RV);
diff --git a/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll b/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll

deleted file mode 100644 (file)

index ce132c6..0000000
--- a/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll
+++ /dev/null
@@ -1,46 +0,0 @@
-; RUN: llc -O3 < %s | FileCheck %s
-; RUN: llc -O3 -addr-sink-using-gep=1 < %s | FileCheck %s
-; Test case for a DAG combiner bug where we combined an indexed load
-; with an extension (sext, zext, or any) into a regular extended load,
-; i.e., dropping the indexed value.
-; <rdar://problem/16389332>
-
-target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-target triple = "arm64-apple-ios"
-
-%class.A = type { i64, i64 }
-%class.C = type { i64 }
-
-; CHECK-LABEL: XX:
-; CHECK: ldr
-define i32 @XX(%class.A* %K, i1 %tst, i32* %addr, %class.C** %ppC, %class.C* %pC) {
-entry:
-  br i1 %tst, label %if.then, label %lor.rhs.i
-
-lor.rhs.i:                                        ; preds = %entry
-  %tmp = load i32* %addr, align 4
-  %y.i.i.i = getelementptr inbounds %class.A* %K, i64 0, i32 1
-  %tmp1 = load i64* %y.i.i.i, align 8
-  %U.sroa.3.8.extract.trunc.i = trunc i64 %tmp1 to i32
-  %div11.i = sdiv i32 %U.sroa.3.8.extract.trunc.i, 17
-  %add12.i = add nsw i32 0, %div11.i
-  %U.sroa.3.12.extract.shift.i = lshr i64 %tmp1, 32
-  %U.sroa.3.12.extract.trunc.i = trunc i64 %U.sroa.3.12.extract.shift.i to i32
-  %div15.i = sdiv i32 %U.sroa.3.12.extract.trunc.i, 13
-  %add16.i = add nsw i32 %add12.i, %div15.i
-  %rem.i.i = srem i32 %add16.i, %tmp
-  %idxprom = sext i32 %rem.i.i to i64
-  %arrayidx = getelementptr inbounds %class.C** %ppC, i64 %idxprom
-  %tobool533 = icmp eq %class.C* %pC, null
-  br i1 %tobool533, label %while.end, label %while.body
-
-if.then:                                          ; preds = %entry
-  ret i32 42
-
-while.body:                                       ; preds = %lor.rhs.i
-  ret i32 5
-
-while.end:                                        ; preds = %lor.rhs.i
-  %tmp3 = load %class.C** %arrayidx, align 8
-  ret i32 50
-}
diff --git a/test/CodeGen/ARM/aapcs-hfa-code.ll b/test/CodeGen/ARM/aapcs-hfa-code.ll

index 396e83816ccf9900556801796be9aa5d32f06fde..41ea6137b3dd86388529747f7419aeb1022be197 100644 (file)
--- a/test/CodeGen/ARM/aapcs-hfa-code.ll
+++ b/test/CodeGen/ARM/aapcs-hfa-code.ll
@@ -92,12 +92,10 @@ define arm_aapcs_vfpcc void @test_1double_misaligned([4 x double], [4 x double],
    call arm_aapcs_vfpcc void @test_1double_misaligned([4 x double] undef, [4 x double] undef, float undef, double 1.0)
  
  ; CHECK-LABEL: test_1double_misaligned:
-; CHECK-DAG: mov [[ONELO:r[0-9]+]], #0
-; CHECK-DAG: mov r[[BASE:[0-9]+]], sp
  ; CHECK-DAG: movw [[ONEHI:r[0-9]+]], #0
+; CHECK-DAG: mov [[ONELO:r[0-9]+]], #0
  ; CHECK-DAG: movt [[ONEHI]], #16368
-; CHECK-DAG: str [[ONELO]], [r[[BASE]], #8]!
-; CHECK-DAG: str [[ONEHI]], [r[[BASE]], #4]
+; CHECK-DAG: strd [[ONELO]], [[ONEHI]], [sp, #8]
  
  ; CHECK-M4F-LABEL: test_1double_misaligned:
  ; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll

index 0c13fb1adfbe9a79bc387d70b2d5ad33d24a0258..48558a38e3add2a86999618e3cc760c9e89df7ec 100644 (file)
--- a/test/CodeGen/Mips/cmov.ll
+++ b/test/CodeGen/Mips/cmov.ll
@@ -757,24 +757,9 @@ define i32 @slti6(i32 %a) nounwind readnone {
  
  ; ALL-LABEL: slti6:
  
-; 32-CMOV-DAG: slti [[R1:\$[0-9]+]], $4, 7
-; 32-CMOV-DAG: xori [[R1]], [[R1]], 1
-; 32-CMOV-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
-; 32-CMOV-NOT: movn
-
-; 32-CMP-DAG:  slti [[R1:\$[0-9]+]], $4, 7
-; 32-CMP-DAG:  xori [[R1]], [[R1]], 1
-; 32-CMP-DAG:  addiu [[R2:\$[0-9]+]], [[R1]], 3
-; 32-CMP-NOT:  seleqz
-; 32-CMP-NOT:  selnez
-
-; 64-CMOV-DAG: slti [[R1:\$[0-9]+]], $4, 7
-; 64-CMOV-DAG: xori [[R1]], [[R1]], 1
-; 64-CMOV-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
-; 64-CMOV-NOT: movn
-
-; 64-CMP-DAG:  slti [[R1:\$[0-9]+]], $4, 7
-; 64-CMP-DAG:  xori [[R1]], [[R1]], 1
-; 64-CMP-DAG:  addiu [[R2:\$[0-9]+]], [[R1]], 3
-; 64-CMP-NOT:  seleqz
-; 64-CMP-NOT:  selnez
+; ALL-DAG: addiu [[R1:\$[0-9]+]], $zero, 6
+; ALL-DAG: slt [[R1]], [[R1]], $4
+; ALL-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
+; ALL-NOT: movn
+; ALL-NOT:  seleqz
+; ALL-NOT:  selnez
diff --git a/test/CodeGen/R600/add_i64.ll b/test/CodeGen/R600/add_i64.ll

index f733d90404218e1f40d1065d72c397a5c39fc450..dac4f173e3163bd944fa274391365f1473a35f58 100644 (file)
--- a/test/CodeGen/R600/add_i64.ll
+++ b/test/CodeGen/R600/add_i64.ll
@@ -70,8 +70,8 @@ define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> add
  }
  
  ; SI-LABEL: @trunc_i64_add_to_i32
-; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG0:[0-9]+]]
-; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG1:[0-9]+]]
+; SI: S_LOAD_DWORD s[[SREG0:[0-9]+]]
+; SI: S_LOAD_DWORD s[[SREG1:[0-9]+]]
  ; SI: S_ADD_I32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
  ; SI-NOT: ADDC
  ; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll

index 3c3b475d077cbcfdf25d19f57faf94ebea8406fe..a2b7e47f4d4b28caf9eb18cb80bd294e4354ec74 100644 (file)
--- a/test/CodeGen/R600/or.ll
+++ b/test/CodeGen/R600/or.ll
@@ -116,10 +116,10 @@ define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64
  }
  
  ; SI-LABEL: @trunc_i64_or_to_i32
-; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG0:[0-9]+]]
-; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG1:[0-9]+]]
-; SI: S_OR_B32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: S_LOAD_DWORD s[[SREG0:[0-9]+]]
+; SI: S_LOAD_DWORD s[[SREG1:[0-9]+]]
+; SI: S_OR_B32 s[[SRESULT:[0-9]+]], s[[SREG1]], s[[SREG0]]
+; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], s[[SRESULT]]
  ; SI: BUFFER_STORE_DWORD [[VRESULT]],
  define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
    %add = or i64 %b, %a
diff --git a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll

index 5372bc522785840f8bf9d59db755fd19061b4ec4..60025bfcdc81dbbe5a55d3cc58525ae319f7b0ef 100644 (file)
--- a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
+++ b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
@@ -7,7 +7,7 @@ entry:
          %tmp1 = bitcast double %a to <8 x i8>
          %tmp2 = bitcast double %b to <8 x i8>
          %tmp3 = add <8 x i8> %tmp1, %tmp2
-; CHECK:  paddw
+; CHECK:  paddb
          store <8 x i8> %tmp3, <8 x i8>* null
          ret void
  }
@@ -18,7 +18,7 @@ entry:
          %tmp1 = bitcast double %a to <4 x i16>
          %tmp2 = bitcast double %b to <4 x i16>
          %tmp3 = add <4 x i16> %tmp1, %tmp2
-; CHECK:  paddd
+; CHECK:  paddw
          store <4 x i16> %tmp3, <4 x i16>* null
          ret void
  }
@@ -29,7 +29,7 @@ entry:
          %tmp1 = bitcast double %a to <2 x i32>
          %tmp2 = bitcast double %b to <2 x i32>
          %tmp3 = add <2 x i32> %tmp1, %tmp2
-; CHECK:  paddq
+; CHECK:  paddd
          store <2 x i32> %tmp3, <2 x i32>* null
          ret void
  }
diff --git a/test/CodeGen/X86/i8-umulo.ll b/test/CodeGen/X86/i8-umulo.ll

index ba846f3e9be309dee57da0e23b2fcc205ab841aa..1d70f4a87540f4fe88c90c6cf6a798d5bc8a0845 100644 (file)
--- a/test/CodeGen/X86/i8-umulo.ll
+++ b/test/CodeGen/X86/i8-umulo.ll
@@ -3,7 +3,7 @@
  
  declare {i8, i1} @llvm.umul.with.overflow.i8(i8 %a, i8 %b)
  define i8 @testumulo(i32 %argc) {
-; CHECK: imulw
+; CHECK: imull
  ; CHECK: testb %{{.+}}, %{{.+}}
  ; CHECK: je [[NOOVERFLOWLABEL:.+]]
  ; CHECK: {{.*}}[[NOOVERFLOWLABEL]]:
diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll

index d4174539f2f95c52a7f76484648545b09c571add..dfa8aed4646390ee81b5922561ce0b7b4f2673c1 100644 (file)
--- a/test/CodeGen/X86/jump_sign.ll
+++ b/test/CodeGen/X86/jump_sign.ll
@@ -284,7 +284,7 @@ entry:
  define i32 @func_test1(i32 %p1) nounwind uwtable {
  entry:
  ; CHECK-LABEL: func_test1:
-; CHECK: testb
+; CHECK: andb
  ; CHECK: j
  ; CHECK: ret
    %0 = load i32* @b, align 4
diff --git a/test/CodeGen/X86/lower-bitcast.ll b/test/CodeGen/X86/lower-bitcast.ll

index f47161e5520ca75f9f9a553403d8430715f4dd63..edb8433ec30ca831ea63e5fd1988c275ce726c55 100644 (file)
--- a/test/CodeGen/X86/lower-bitcast.ll
+++ b/test/CodeGen/X86/lower-bitcast.ll
@@ -68,13 +68,13 @@ define i64 @test4(i64 %A) {
    %2 = bitcast <2 x i32> %add to i64
    ret i64 %2
  }
-; FIXME: At the moment we still produce the sequence pshufd+paddq+pshufd.
+; FIXME: At the moment we still produce the sequence pshufd+paddd+pshufd.
  ; Ideally, we should fold that sequence into a single paddd. This is fixed with
  ; the widening legalization.
  ;
  ; CHECK-LABEL: test4
  ; CHECK: pshufd
-; CHECK-NEXT: paddq
+; CHECK-NEXT: paddd
  ; CHECK-NEXT: pshufd
  ; CHECK: ret
  ;
diff --git a/test/CodeGen/X86/pr15267.ll b/test/CodeGen/X86/pr15267.ll

index c8aaf327a7ddf8c35b10b9c5f31a049a80687c82..b4dc5fd47168ec22173099e14c8cc9cadfe2069a 100644 (file)
--- a/test/CodeGen/X86/pr15267.ll
+++ b/test/CodeGen/X86/pr15267.ll
@@ -48,19 +48,22 @@ define <4 x i64> @test3(<4 x i1>* %in) nounwind {
  
  ; CHECK: test3
  ; CHECK: movzbl
-; CHECK: shrl
-; CHECK: andl $1
-; CHECK: andl $1
-; CHECK: vmovd
-; CHECK: pinsrd $1
-; CHECK: shrl $2
-; CHECK: andl $1
-; CHECK: pinsrd $2
-; CHECK: shrl $3
-; CHECK: andl $1
-; CHECK: pinsrd $3
-; CHECK: pslld
-; CHECK: psrad
-; CHECK: pmovsxdq
-; CHECK: pmovsxdq
+; CHECK: movq
+; CHECK: shlq
+; CHECK: sarq
+; CHECK: vmovq
+; CHECK: movq
+; CHECK: shlq
+; CHECK: sarq
+; CHECK: vmovq
+; CHECK: vpunpcklqdq
+; CHECK: movq
+; CHECK: shlq
+; CHECK: sarq
+; CHECK: vmovq
+; CHECK: shlq
+; CHECK: sarq
+; CHECK: vmovq
+; CHECK: vpunpcklqdq
+; CHECK: vinsertf128
  ; CHECK: ret
diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll

index 51f6fb0dbbe08480e6eab660f3aed90dd344432a..e3cc2fa668efe07afb48f33baaccac55207895d9 100644 (file)
--- a/test/CodeGen/X86/store-narrow.ll
+++ b/test/CodeGen/X86/store-narrow.ll
@@ -34,7 +34,7 @@ entry:
  ; X64: movb    %sil, 1(%rdi)
  
  ; X32-LABEL: test2:
-; X32: movzbl  8(%esp), %e[[REG:[abcd]]]x
+; X32: movb    8(%esp), %[[REG:[abcd]]]l
  ; X32: movb    %[[REG]]l, 1(%{{.*}})
  }
  
@@ -67,8 +67,8 @@ entry:
  ; X64: movw    %si, 2(%rdi)
  
  ; X32-LABEL: test4:
-; X32: movl    8(%esp), %e[[REG:[abcd]x]]
-; X32: movw    %[[REG]], 2(%{{.*}})
+; X32: movw    8(%esp), %[[REG:[abcd]]]x
+; X32: movw    %[[REG]]x, 2(%{{.*}})
  }
  
  define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp {
@@ -84,8 +84,8 @@ entry:
  ; X64: movw    %si, 2(%rdi)
  
  ; X32-LABEL: test5:
-; X32: movzwl  8(%esp), %e[[REG:[abcd]x]]
-; X32: movw    %[[REG]], 2(%{{.*}})
+; X32: movw    8(%esp), %[[REG:[abcd]]]x
+; X32: movw    %[[REG]]x, 2(%{{.*}})
  }
  
  define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp {
diff --git a/test/CodeGen/X86/trunc-ext-ld-st.ll b/test/CodeGen/X86/trunc-ext-ld-st.ll

index d230f1f7e2c688a0e5b770e1ea42fcb2b4fe3bfe..b981871d94b0d8486917c5a6f1b26c5fa98a9a2b 100644 (file)
--- a/test/CodeGen/X86/trunc-ext-ld-st.ll
+++ b/test/CodeGen/X86/trunc-ext-ld-st.ll
@@ -32,7 +32,7 @@ define void @load_2_i16(<2 x i16>* %A)  {
  
  ;CHECK-LABEL: load_2_i32:
  ;CHECK: pmovzxdq
-;CHECK: paddq
+;CHECK: paddd
  ;CHECK: pshufd
  ;CHECK: ret
  define void @load_2_i32(<2 x i32>* %A)  {
@@ -56,7 +56,7 @@ define void @load_4_i8(<4 x i8>* %A)  {
  
  ;CHECK-LABEL: load_4_i16:
  ;CHECK: pmovzxwd
-;CHECK: paddd
+;CHECK: paddw
  ;CHECK: pshufb
  ;CHECK: ret
  define void @load_4_i16(<4 x i16>* %A)  {
@@ -68,7 +68,7 @@ define void @load_4_i16(<4 x i16>* %A)  {
  
  ;CHECK-LABEL: load_8_i8:
  ;CHECK: pmovzxbw
-;CHECK: paddw
+;CHECK: paddb
  ;CHECK: pshufb
  ;CHECK: ret
  define void @load_8_i8(<8 x i8>* %A)  {
diff --git a/test/CodeGen/X86/vector-idiv.ll b/test/CodeGen/X86/vector-idiv.ll

index a3229073751b567d49d800cbb215949845047b1b..ec1ce3da5e14c92d5df26590d5b0a37ef0c77171 100644 (file)
--- a/test/CodeGen/X86/vector-idiv.ll
+++ b/test/CodeGen/X86/vector-idiv.ll
@@ -122,7 +122,7 @@ define <4 x i32> @test8(<4 x i32> %a) {
  ; SSE41-LABEL: test8:
  ; SSE41: pmuldq
  ; SSE41: pshufd        $49
-; SSE41-NOT: pshufd    $49
+; SSE41: pshufd        $49
  ; SSE41: pmuldq
  ; SSE41: shufps        $-35
  ; SSE41: pshufd        $-40
@@ -134,7 +134,7 @@ define <4 x i32> @test8(<4 x i32> %a) {
  ; SSE-LABEL: test8:
  ; SSE: pmuludq
  ; SSE: pshufd  $49
-; SSE-NOT: pshufd      $49
+; SSE: pshufd  $49
  ; SSE: pmuludq
  ; SSE: shufps  $-35
  ; SSE: pshufd  $-40
@@ -147,7 +147,7 @@ define <4 x i32> @test8(<4 x i32> %a) {
  ; AVX-LABEL: test8:
  ; AVX: vpmuldq
  ; AVX: vpshufd $49
-; AVX-NOT: vpshufd     $49
+; AVX: vpshufd $49
  ; AVX: vpmuldq
  ; AVX: vshufps $-35
  ; AVX: vpshufd $-40
@@ -162,10 +162,12 @@ define <8 x i32> @test9(<8 x i32> %a) {
    ret <8 x i32> %div
  
  ; AVX-LABEL: test9:
-; AVX: vpalignr $4
  ; AVX: vpbroadcastd
+; AVX: vpalignr $4
+; AVX: vpalignr $4
  ; AVX: vpmuldq
  ; AVX: vpmuldq
+; AVX: vpalignr $4
  ; AVX: vpblendd $170
  ; AVX: vpadd
  ; AVX: vpsrld $31
@@ -195,10 +197,12 @@ define <8 x i32> @test11(<8 x i32> %a) {
    ret <8 x i32> %rem
  
  ; AVX-LABEL: test11:
-; AVX: vpalignr $4
  ; AVX: vpbroadcastd
+; AVX: vpalignr $4
+; AVX: vpalignr $4
  ; AVX: vpmuldq
  ; AVX: vpmuldq
+; AVX: vpalignr $4
  ; AVX: vpblendd $170
  ; AVX: vpadd
  ; AVX: vpsrld $31
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll

index d115929f5aab784d4e74402952dac2e9c362db82..e0b861f29de8208b9d2eaf45dc452dedbdb8311a 100644 (file)
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -2,12 +2,12 @@
  ; RUN: llc -march=x86 -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s
  
  ; CHECK: movl
-; CHECK: paddd
+; CHECK: paddw
  ; CHECK: movlpd
  
  ; Scheduler causes produce a different instruction order
  ; ATOM: movl
-; ATOM: paddd
+; ATOM: paddw
  ; ATOM: movlpd
  
  ; bitcast a v4i16 to v2i32
diff --git a/test/CodeGen/X86/widen_conv-1.ll b/test/CodeGen/X86/widen_conv-1.ll

index 9f6778cff5927f7cb1e4dbacefc5f64eee828e3f..3f54ab694c07124878e6c8cab7d10ef0ad87e48a 100644 (file)
--- a/test/CodeGen/X86/widen_conv-1.ll
+++ b/test/CodeGen/X86/widen_conv-1.ll
@@ -1,5 +1,5 @@
  ; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
-; CHECK: paddq
+; CHECK: paddd
  
  ; truncate v2i64 to v2i32
  
diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll

index 9d298adcc81db5afa7a5ccac5ef7f50e13c7e84d..0ec3574d69eb7cf9f7a6df47e7f67b9e9a9c6097 100644 (file)
--- a/test/CodeGen/X86/widen_load-2.ll
+++ b/test/CodeGen/X86/widen_load-2.ll
@@ -91,10 +91,9 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp
  %i16vec4 = type <4 x i16>
  define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
  ; CHECK-LABEL: add4i16:
-; CHECK:         pmovzxwd (%{{.*}}), %[[R0:xmm[0-9]+]]
-; CHECK-NEXT:    pmovzxwd (%{{.*}}), %[[R1:xmm[0-9]+]]
-; CHECK-NEXT:    paddd   %[[R0]], %[[R1]]
-; CHECK-NEXT:    pshufb  {{.*}}, %[[R1]]
+; CHECK:         movq    (%{{.*}}), %[[R0:xmm[0-9]+]]
+; CHECK-NEXT:    movq    (%{{.*}}), %[[R1:xmm[0-9]+]]
+; CHECK-NEXT:    paddw   %[[R0]], %[[R1]]
  ; CHECK-NEXT:    movq    %[[R1]], (%{{.*}})
         %a = load %i16vec4* %ap, align 16
         %b = load %i16vec4* %bp, align 16
diff --git a/test/CodeGen/X86/x86-64-tls-1.ll b/test/CodeGen/X86/x86-64-tls-1.ll

index 641786f5a9149d4e276d20bf877a6dd55fe673f3..2879fb4e1e7459ce2b075bcde8b7ba7354d89dd7 100644 (file)
--- a/test/CodeGen/X86/x86-64-tls-1.ll
+++ b/test/CodeGen/X86/x86-64-tls-1.ll
@@ -1,10 +1,9 @@
  ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
  @tm_nest_level = internal thread_local global i32 0
  define i64 @z() nounwind {
-; FIXME: The codegen here is primitive at best and could be much better.
-; The add and the moves can be folded together.
-; CHECK-DAG: movq    $tm_nest_level@TPOFF, %rcx
-; CHECK-DAG: movq    %fs:0, %rax
-; CHECK: addl    %ecx, %eax
+; CHECK:      movq    $tm_nest_level@TPOFF, %r[[R0:[abcd]]]x
+; CHECK-NEXT: addl    %fs:0, %e[[R0]]x
+; CHECK-NEXT: andq    $100, %r[[R0]]x
+
    ret i64 and (i64 ptrtoint (i32* @tm_nest_level to i64), i64 100)
  }
diff --git a/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll b/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll

index 4fe6c664df53bb7b6412a316a20476c131923ecf..4317d8ab6a26b93e10a951cc927266c2062fe3de 100644 (file)
--- a/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
+++ b/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
@@ -2,10 +2,10 @@
  
  define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind {
  ; CHECK-LABEL: LCPI0_0:
-; CHECK-NEXT: .long 1065353216              ## float 1.000000e+00
-; CHECK-NEXT: .long 1065353216              ## float 1.000000e+00
-; CHECK-NEXT: .long 1065353216              ## float 1.000000e+00
-; CHECK-NEXT: .long 1065353216              ## float 1.000000e+00
+; CHECK-NEXT: .long 1065353216              ## 0x3f800000
+; CHECK-NEXT: .long 1065353216              ## 0x3f800000
+; CHECK-NEXT: .long 1065353216              ## 0x3f800000
+; CHECK-NEXT: .long 1065353216              ## 0x3f800000
  ; CHECK-LABEL: foo:
  ; CHECK: cmpeqps %xmm1, %xmm0
  ; CHECK-NEXT: andps LCPI0_0(%rip), %xmm0
@@ -59,10 +59,10 @@ define void @foo2(<4 x float>* noalias %result) nounwind {
  ; scalar value like what the zext creates.
  define <4 x float> @foo3(<4 x float> %val, <4 x float> %test) nounwind {
  ; CHECK-LABEL: LCPI3_0:
-; CHECK-NEXT: .long 1065353216              ## float 1.000000e+00
-; CHECK-NEXT: .long 0                       ## float 0.000000e+00
-; CHECK-NEXT: .long 1065353216              ## float 1.000000e+00
-; CHECK-NEXT: .long 0                       ## float 0.000000e+00
+; CHECK-NEXT: .long 1065353216              ## 0x3f800000
+; CHECK-NEXT: .long 0                       ## 0x0
+; CHECK-NEXT: .long 1065353216              ## 0x3f800000
+; CHECK-NEXT: .long 0                       ## 0x0
  ; CHECK-LABEL: foo3:
  ; CHECK: cmpeqps %xmm1, %xmm0
  ; CHECK-NEXT: andps LCPI3_0(%rip), %xmm0
author	Chandler Carruth <chandlerc@gmail.com>
	Thu, 24 Jul 2014 22:15:28 +0000 (22:15 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Thu, 24 Jul 2014 22:15:28 +0000 (22:15 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll	[deleted file]	patch \| blob \| history
test/CodeGen/ARM/aapcs-hfa-code.ll		patch \| blob \| history
test/CodeGen/Mips/cmov.ll		patch \| blob \| history
test/CodeGen/R600/add_i64.ll		patch \| blob \| history
test/CodeGen/R600/or.ll		patch \| blob \| history
test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll		patch \| blob \| history
test/CodeGen/X86/i8-umulo.ll		patch \| blob \| history
test/CodeGen/X86/jump_sign.ll		patch \| blob \| history
test/CodeGen/X86/lower-bitcast.ll		patch \| blob \| history
test/CodeGen/X86/pr15267.ll		patch \| blob \| history
test/CodeGen/X86/store-narrow.ll		patch \| blob \| history
test/CodeGen/X86/trunc-ext-ld-st.ll		patch \| blob \| history
test/CodeGen/X86/vector-idiv.ll		patch \| blob \| history
test/CodeGen/X86/widen_cast-1.ll		patch \| blob \| history
test/CodeGen/X86/widen_conv-1.ll		patch \| blob \| history
test/CodeGen/X86/widen_load-2.ll		patch \| blob \| history
test/CodeGen/X86/x86-64-tls-1.ll		patch \| blob \| history
test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll		patch \| blob \| history