From: Che-Liang Chiou Date: Wed, 20 Apr 2011 09:28:20 +0000 (+0000) Subject: ptx: add floating-point comparison to setp X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=1e93249199d25d3a68bdc51d7c0dd682e2e894be;p=oota-llvm.git ptx: add floating-point comparison to setp Patched by Dan Bailey git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129847 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index c075512e8ab..25e7ae1b6ed 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -323,9 +323,9 @@ multiclass INT3ntnc { [(set RRegu64:$d, (opnode imm:$a, RRegu64:$b))]>; } -multiclass PTX_SETP { - // TODO 1. support floating-point 2. support 5-operand format: p|q, a, b, c + // TODO support 5-operand format: p|q, a, b, c def rr : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b), @@ -387,6 +387,74 @@ multiclass PTX_SETP; } +multiclass PTX_SETP_FP { + // TODO support 5-operand format: p|q, a, b, c + + def rr_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b), + !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"), + [(set Preds:$p, (setcc RC:$a, RC:$b, ucmp))]>; + def rr_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b), + !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), + [(set Preds:$p, (setcc RC:$a, RC:$b, ocmp))]>; + + def rr_and_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>; + def rr_and_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>; + + def rr_or_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>; + def rr_or_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>; + + def rr_xor_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>; + def rr_xor_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>; + + def rr_and_not_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>; + def rr_and_not_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>; + + def rr_or_not_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>; + def rr_or_not_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>; + + def rr_xor_not_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>; + def rr_xor_not_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>; +} + multiclass PTX_LD { def rr32 : InstPTX<(outs RC:$d), (ins MEMri32:$a), @@ -557,18 +625,50 @@ def FCOS64 : InstPTX<(outs RRegf64:$d), ///===- Comparison and Selection Instructions -----------------------------===// -defm SETPEQu32 : PTX_SETP; -defm SETPNEu32 : PTX_SETP; -defm SETPLTu32 : PTX_SETP; -defm SETPLEu32 : PTX_SETP; -defm SETPGTu32 : PTX_SETP; -defm SETPGEu32 : PTX_SETP; -defm SETPEQu64 : PTX_SETP; -defm SETPNEu64 : PTX_SETP; -defm SETPLTu64 : PTX_SETP; -defm SETPLEu64 : PTX_SETP; -defm SETPGTu64 : PTX_SETP; -defm SETPGEu64 : PTX_SETP; +// Compare u16 + +defm SETPEQu16 : PTX_SETP_I; +defm SETPNEu16 : PTX_SETP_I; +defm SETPLTu16 : PTX_SETP_I; +defm SETPLEu16 : PTX_SETP_I; +defm SETPGTu16 : PTX_SETP_I; +defm SETPGEu16 : PTX_SETP_I; + +// Compare u32 + +defm SETPEQu32 : PTX_SETP_I; +defm SETPNEu32 : PTX_SETP_I; +defm SETPLTu32 : PTX_SETP_I; +defm SETPLEu32 : PTX_SETP_I; +defm SETPGTu32 : PTX_SETP_I; +defm SETPGEu32 : PTX_SETP_I; + +// Compare u64 + +defm SETPEQu64 : PTX_SETP_I; +defm SETPNEu64 : PTX_SETP_I; +defm SETPLTu64 : PTX_SETP_I; +defm SETPLEu64 : PTX_SETP_I; +defm SETPGTu64 : PTX_SETP_I; +defm SETPGEu64 : PTX_SETP_I; + +// Compare f32 + +defm SETPEQf32 : PTX_SETP_FP; +defm SETPNEf32 : PTX_SETP_FP; +defm SETPLTf32 : PTX_SETP_FP; +defm SETPLEf32 : PTX_SETP_FP; +defm SETPGTf32 : PTX_SETP_FP; +defm SETPGEf32 : PTX_SETP_FP; + +// Compare f64 + +defm SETPEQf64 : PTX_SETP_FP; +defm SETPNEf64 : PTX_SETP_FP; +defm SETPLTf64 : PTX_SETP_FP; +defm SETPLEf64 : PTX_SETP_FP; +defm SETPGTf64 : PTX_SETP_FP; +defm SETPGEf64 : PTX_SETP_FP; ///===- Logic and Shift Instructions --------------------------------------===// @@ -654,18 +754,138 @@ defm STs : PTX_ST_ALL<"st.shared", store_shared>; // defm LDp : PTX_LD_ALL<"ld.param", load_parameter>; // TODO: Do something with st.param if/when it is needed. +// Conversion to pred + +def CVT_pred_u16 + : InstPTX<(outs Preds:$d), (ins RRegu16:$a), "cvt.pred.u16\t$d, $a", + [(set Preds:$d, (trunc RRegu16:$a))]>; + def CVT_pred_u32 : InstPTX<(outs Preds:$d), (ins RRegu32:$a), "cvt.pred.u32\t$d, $a", [(set Preds:$d, (trunc RRegu32:$a))]>; +def CVT_pred_u64 + : InstPTX<(outs Preds:$d), (ins RRegu64:$a), "cvt.pred.u64\t$d, $a", + [(set Preds:$d, (trunc RRegu64:$a))]>; + +def CVT_pred_f32 + : InstPTX<(outs Preds:$d), (ins RRegf32:$a), "cvt.pred.f32\t$d, $a", + [(set Preds:$d, (fp_to_uint RRegf32:$a))]>; + +def CVT_pred_f64 + : InstPTX<(outs Preds:$d), (ins RRegf64:$a), "cvt.pred.f64\t$d, $a", + [(set Preds:$d, (fp_to_uint RRegf64:$a))]>; + +// Conversion to u16 + +def CVT_u16_pred + : InstPTX<(outs RRegu16:$d), (ins Preds:$a), "cvt.u16.pred\t$d, $a", + [(set RRegu16:$d, (zext Preds:$a))]>; + +def CVT_u16_u32 + : InstPTX<(outs RRegu16:$d), (ins RRegu32:$a), "cvt.u16.u32\t$d, $a", + [(set RRegu16:$d, (trunc RRegu32:$a))]>; + +def CVT_u16_u64 + : InstPTX<(outs RRegu16:$d), (ins RRegu64:$a), "cvt.u16.u64\t$d, $a", + [(set RRegu16:$d, (trunc RRegu64:$a))]>; + +def CVT_u16_f32 + : InstPTX<(outs RRegu16:$d), (ins RRegf32:$a), "cvt.u16.f32\t$d, $a", + [(set RRegu16:$d, (fp_to_uint RRegf32:$a))]>; + +def CVT_u16_f64 + : InstPTX<(outs RRegu16:$d), (ins RRegf64:$a), "cvt.u16.f64\t$d, $a", + [(set RRegu16:$d, (fp_to_uint RRegf64:$a))]>; + +// Conversion to u32 + def CVT_u32_pred : InstPTX<(outs RRegu32:$d), (ins Preds:$a), "cvt.u32.pred\t$d, $a", [(set RRegu32:$d, (zext Preds:$a))]>; +def CVT_u32_u16 + : InstPTX<(outs RRegu32:$d), (ins RRegu16:$a), "cvt.u32.u16\t$d, $a", + [(set RRegu32:$d, (zext RRegu16:$a))]>; + +def CVT_u32_u64 + : InstPTX<(outs RRegu32:$d), (ins RRegu64:$a), "cvt.u32.u64\t$d, $a", + [(set RRegu32:$d, (trunc RRegu64:$a))]>; + +def CVT_u32_f32 + : InstPTX<(outs RRegu32:$d), (ins RRegf32:$a), "cvt.u32.f32\t$d, $a", + [(set RRegu32:$d, (fp_to_uint RRegf32:$a))]>; + +def CVT_u32_f64 + : InstPTX<(outs RRegu32:$d), (ins RRegf64:$a), "cvt.u32.f64\t$d, $a", + [(set RRegu32:$d, (fp_to_uint RRegf64:$a))]>; + +// Conversion to u64 + +def CVT_u64_pred + : InstPTX<(outs RRegu64:$d), (ins Preds:$a), "cvt.u64.pred\t$d, $a", + [(set RRegu64:$d, (zext Preds:$a))]>; + +def CVT_u64_u16 + : InstPTX<(outs RRegu64:$d), (ins RRegu16:$a), "cvt.u64.u16\t$d, $a", + [(set RRegu64:$d, (zext RRegu16:$a))]>; + def CVT_u64_u32 : InstPTX<(outs RRegu64:$d), (ins RRegu32:$a), "cvt.u64.u32\t$d, $a", [(set RRegu64:$d, (zext RRegu32:$a))]>; +def CVT_u64_f32 + : InstPTX<(outs RRegu64:$d), (ins RRegf32:$a), "cvt.u64.f32\t$d, $a", + [(set RRegu64:$d, (fp_to_uint RRegf32:$a))]>; + +def CVT_u64_f64 + : InstPTX<(outs RRegu64:$d), (ins RRegf64:$a), "cvt.u64.f32\t$d, $a", + [(set RRegu64:$d, (fp_to_uint RRegf64:$a))]>; + +// Conversion to f32 + +def CVT_f32_pred + : InstPTX<(outs RRegf32:$d), (ins Preds:$a), "cvt.f32.pred\t$d, $a", + [(set RRegf32:$d, (uint_to_fp Preds:$a))]>; + +def CVT_f32_u16 + : InstPTX<(outs RRegf32:$d), (ins RRegu16:$a), "cvt.f32.u16\t$d, $a", + [(set RRegf32:$d, (uint_to_fp RRegu16:$a))]>; + +def CVT_f32_u32 + : InstPTX<(outs RRegf32:$d), (ins RRegu32:$a), "cvt.f32.u32\t$d, $a", + [(set RRegf32:$d, (uint_to_fp RRegu32:$a))]>; + +def CVT_f32_u64 + : InstPTX<(outs RRegf32:$d), (ins RRegu64:$a), "cvt.f32.u64\t$d, $a", + [(set RRegf32:$d, (uint_to_fp RRegu64:$a))]>; + +def CVT_f32_f64 + : InstPTX<(outs RRegf32:$d), (ins RRegf64:$a), "cvt.f32.f64\t$d, $a", + [(set RRegf32:$d, (fround RRegf64:$a))]>; + +// Conversion to f64 + +def CVT_f64_pred + : InstPTX<(outs RRegf64:$d), (ins Preds:$a), "cvt.f64.pred\t$d, $a", + [(set RRegf64:$d, (uint_to_fp Preds:$a))]>; + +def CVT_f64_u16 + : InstPTX<(outs RRegf64:$d), (ins RRegu16:$a), "cvt.f64.u16\t$d, $a", + [(set RRegf64:$d, (uint_to_fp RRegu16:$a))]>; + +def CVT_f64_u32 + : InstPTX<(outs RRegf64:$d), (ins RRegu32:$a), "cvt.f64.u32\t$d, $a", + [(set RRegf64:$d, (uint_to_fp RRegu32:$a))]>; + +def CVT_f64_u64 + : InstPTX<(outs RRegf64:$d), (ins RRegu64:$a), "cvt.f64.u64\t$d, $a", + [(set RRegf64:$d, (uint_to_fp RRegu64:$a))]>; + +def CVT_f64_f32 + : InstPTX<(outs RRegf64:$d), (ins RRegf32:$a), "cvt.f64.f32\t$d, $a", + [(set RRegf64:$d, (fextend RRegf32:$a))]>; + ///===- Control Flow Instructions -----------------------------------------===// let isBranch = 1, isTerminator = 1, isBarrier = 1 in {