From 4b9d868cc774eba681a93a61f056105b7dfd9c8f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 19 Dec 2015 01:39:48 +0000 Subject: [PATCH] Fix broken type legalization of min/max This was using an anyext when promoting the type when zext/sext is required. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@256074 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 21 +++++++++- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + test/CodeGen/AMDGPU/max.ll | 38 +++++++++++++++++++ test/CodeGen/AMDGPU/min.ll | 32 ++++++++++++++++ 4 files changed, 90 insertions(+), 2 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 63c9cc52871..3131ca10145 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -76,9 +76,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; case ISD::SMIN: - case ISD::SMAX: + case ISD::SMAX: Res = PromoteIntRes_SExtOrZExtIntBinOp(N, true); break; case ISD::UMIN: - case ISD::UMAX: Res = PromoteIntRes_SimpleIntBinOp(N); break; + case ISD::UMAX: Res = PromoteIntRes_SExtOrZExtIntBinOp(N, false); break; + case ISD::SHL: Res = PromoteIntRes_SHL(N); break; case ISD::SIGN_EXTEND_INREG: Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break; @@ -660,6 +661,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { LHS.getValueType(), LHS, RHS); } +SDValue DAGTypeLegalizer::PromoteIntRes_SExtOrZExtIntBinOp(SDNode *N, + bool Signed) { + SDValue LHS, RHS; + + if (Signed) { + LHS = SExtPromotedInteger(N->getOperand(0)); + RHS = SExtPromotedInteger(N->getOperand(1)); + } else { + LHS = ZExtPromotedInteger(N->getOperand(0)); + RHS = ZExtPromotedInteger(N->getOperand(1)); + } + + return DAG.getNode(N->getOpcode(), SDLoc(N), + LHS.getValueType(), LHS, RHS); +} + SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 267a1145a0a..e121e3bc6fa 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -276,6 +276,7 @@ private: SDValue PromoteIntRes_SETCC(SDNode *N); SDValue PromoteIntRes_SHL(SDNode *N); SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N); + SDValue PromoteIntRes_SExtOrZExtIntBinOp(SDNode *N, bool Signed); SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N); SDValue PromoteIntRes_SRA(SDNode *N); SDValue PromoteIntRes_SRL(SDNode *N); diff --git a/test/CodeGen/AMDGPU/max.ll b/test/CodeGen/AMDGPU/max.ll index 3f8662d507e..eeb915c10a9 100644 --- a/test/CodeGen/AMDGPU/max.ll +++ b/test/CodeGen/AMDGPU/max.ll @@ -53,6 +53,23 @@ define void @s_test_imax_sge_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { ret void } +; FUNC-LABEL: {{^}}v_test_imax_sge_i8: +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: v_max_i32_e32 +define void @v_test_imax_sge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid + %a = load i8, i8 addrspace(1)* %gep0, align 1 + %b = load i8, i8 addrspace(1)* %gep1, align 1 + %cmp = icmp sge i8 %a, %b + %val = select i1 %cmp, i8 %a, i8 %b + store i8 %val, i8 addrspace(1)* %outgep, align 1 + ret void +} + ; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_i32: ; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9 define void @s_test_imax_sgt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind { @@ -132,6 +149,23 @@ define void @s_test_umax_uge_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a, < ret void } +; FUNC-LABEL: {{^}}v_test_umax_uge_i8: +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: v_max_u32_e32 +define void @v_test_umax_uge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid + %a = load i8, i8 addrspace(1)* %gep0, align 1 + %b = load i8, i8 addrspace(1)* %gep1, align 1 + %cmp = icmp uge i8 %a, %b + %val = select i1 %cmp, i8 %a, i8 %b + store i8 %val, i8 addrspace(1)* %outgep, align 1 + ret void +} + ; FUNC-LABEL: @v_test_umax_ugt_i32 ; SI: v_max_u32_e32 define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { @@ -203,6 +237,10 @@ define void @simplify_demanded_bits_test_max_slt_i16(i32 addrspace(1)* %out, i16 } ; FUNC-LABEL: {{^}}s_test_imax_sge_i16: +; SI: s_load_dword +; SI: s_load_dword +; SI: s_sext_i32_i16 +; SI: s_sext_i32_i16 ; SI: s_max_i32 define void @s_test_imax_sge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind { %cmp = icmp sge i16 %a, %b diff --git a/test/CodeGen/AMDGPU/min.ll b/test/CodeGen/AMDGPU/min.ll index fbc3d7dcea2..215dbeb4b2f 100644 --- a/test/CodeGen/AMDGPU/min.ll +++ b/test/CodeGen/AMDGPU/min.ll @@ -48,6 +48,10 @@ define void @s_test_imin_sle_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, < } ; FUNC-LABEL: {{^}}s_test_imin_sle_i8: +; SI: s_load_dword +; SI: s_load_dword +; SI: s_sext_i32_i8 +; SI: s_sext_i32_i8 ; SI: s_min_i32 define void @s_test_imin_sle_i8(i8 addrspace(1)* %out, i8 %a, i8 %b) nounwind { %cmp = icmp sle i8 %a, %b @@ -60,10 +64,21 @@ define void @s_test_imin_sle_i8(i8 addrspace(1)* %out, i8 %a, i8 %b) nounwind { ; extloads with mubuf instructions. ; FUNC-LABEL: {{^}}s_test_imin_sle_v4i8: +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte +; SI: buffer_load_sbyte + ; SI: v_min_i32 ; SI: v_min_i32 ; SI: v_min_i32 ; SI: v_min_i32 + +; SI: s_endpgm define void @s_test_imin_sle_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b) nounwind { %cmp = icmp sle <4 x i8> %a, %b %val = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b @@ -192,6 +207,23 @@ define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr ret void } +; FUNC-LABEL: {{^}}v_test_umin_ult_i8: +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: v_min_u32_e32 +define void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid + %a = load i8, i8 addrspace(1)* %gep0, align 1 + %b = load i8, i8 addrspace(1)* %gep1, align 1 + %cmp = icmp ult i8 %a, %b + %val = select i1 %cmp, i8 %a, i8 %b + store i8 %val, i8 addrspace(1)* %outgep, align 1 + ret void +} + ; FUNC-LABEL: @s_test_umin_ult_i32 ; SI: s_min_u32 define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { -- 2.34.1