From b2322561cbc79ff2989dc5622ec3063502472337 Mon Sep 17 00:00:00 2001 From: Duraid Madina Date: Tue, 26 Apr 2005 07:23:02 +0000 Subject: [PATCH] * Add code to reduce multiplies by constant integers to shifts, adds and subtracts. This is a very rough and nasty implementation of Lefevre's "pattern finding" algorithm. With a few small changes though, it should end up beating most other methods in common use, regardless of the size of the constant (currently, it's often one or two shifts worse) TODO: rewrite it so it's not hideously ugly (this is a translation from perl, which doesn't help ;) bypass most of it for multiplies by 2^n+1 (eventually) teach it that some combinations of shift+add are cheaper than others (e.g. shladd on ia64, scaled adds on alpha) get it to try multiple booth encodings in search of the cheapest routine make it work for negative constants This is hacked up as a DAG->DAG transform, so once I clean it up I hope it'll be pulled out of here and put somewhere else. The only thing backends should really have to worry about for now is where to draw the line between using this code vs. going ahead and doing an integer multiply anyway. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@21560 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/IA64/IA64ISelPattern.cpp | 454 +++++++++++++++++++++++++++- 1 file changed, 439 insertions(+), 15 deletions(-) diff --git a/lib/Target/IA64/IA64ISelPattern.cpp b/lib/Target/IA64/IA64ISelPattern.cpp index 7c03a45bca5..20189e1999c 100644 --- a/lib/Target/IA64/IA64ISelPattern.cpp +++ b/lib/Target/IA64/IA64ISelPattern.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/ADT/Statistic.h" #include +#include #include using namespace llvm; @@ -412,6 +413,9 @@ namespace { /// IA64Lowering - This object fully describes how to lower LLVM code to an /// IA64-specific SelectionDAG. IA64TargetLowering IA64Lowering; + SelectionDAG *ISelDAG; // Hack to support us having a dag->dag transform + // for sdiv and udiv until it is put into the future + // dag combiner /// ExprMap - As shared expressions are codegen'd, we keep track of which /// vreg the value is produced in, so we only emit one copy of each compiled @@ -420,8 +424,8 @@ namespace { std::set LoweredTokens; public: - ISel(TargetMachine &TM) : SelectionDAGISel(IA64Lowering), IA64Lowering(TM) { - } + ISel(TargetMachine &TM) : SelectionDAGISel(IA64Lowering), IA64Lowering(TM), + ISelDAG(0) { } /// InstructionSelectBasicBlock - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. @@ -429,6 +433,9 @@ namespace { unsigned SelectExpr(SDOperand N); void Select(SDOperand N); + // a dag->dag to transform mul-by-constant-int to shifts+adds/subs + SDOperand BuildConstmulSequence(SDOperand N); + }; } @@ -437,11 +444,419 @@ namespace { void ISel::InstructionSelectBasicBlock(SelectionDAG &DAG) { // Codegen the basic block. + ISelDAG = &DAG; Select(DAG.getRoot()); // Clear state used for selection. ExprMap.clear(); LoweredTokens.clear(); + ISelDAG = 0; +} + +const char sign[2]={'+','-'}; + + +// strip leading '0' characters from a string +void munchLeadingZeros(std::string& inString) { + while(inString.c_str()[0]=='0') { + inString.erase(0, 1); + } +} + +// strip trailing '0' characters from a string +void munchTrailingZeros(std::string& inString) { + int curPos=inString.length()-1; + + while(inString.c_str()[curPos]=='0') { + inString.erase(curPos, 1); + curPos--; + } +} + +// return how many consecutive '0' characters are at the end of a string +unsigned int countTrailingZeros(std::string& inString) { + int curPos=inString.length()-1; + unsigned int zeroCount=0; + // assert goes here + while(inString.c_str()[curPos--]=='0') { + zeroCount++; + } + return zeroCount; +} + +// booth encode a string of '1' and '0' characters (returns string of 'P' (+1) +// '0' and 'N' (-1) characters) +void boothEncode(std::string inString, std::string& boothEncodedString) { + + int curpos=0; + int replacements=0; + int lim=inString.size(); + + while(curpos1) { + inString.replace(curpos, runlength+1, replaceString); + curpos+=runlength-1; + } else + curpos++; + } else { // a zero, we just keep chugging along + curpos++; + } + } + + // clean up (trim the string, reverse it and turn '1's into 'P's) + munchTrailingZeros(inString); + boothEncodedString=""; + + for(int i=inString.size()-1;i>=0;i--) + if(inString[i]=='1') + boothEncodedString+="P"; + else + boothEncodedString+=inString[i]; + +} + +struct shiftaddblob { // this encodes stuff like (x=) "A << B [+-] C << D" + unsigned firstVal; // A + unsigned firstShift; // B + unsigned secondVal; // C + unsigned secondShift; // D + bool isSub; +}; + +/* this implements Lefevre's "pattern-based" constant multiplication, + * see "Multiplication by an Integer Constant", INRIA report 1999-06 + * + * TODO: implement a method to try rewriting P0N<->0PP / N0P<->0NN + * to get better booth encodings - this does help in practice + * TODO: weight shifts appropriately (most architectures can't + * fuse a shift and an add for arbitrary shift amounts) */ +unsigned lefevre(const std::string inString, + std::vector &ops) { + std::string retstring; + std::string s = inString; + munchTrailingZeros(s); + + int length=s.length()-1; + + if(length==0) { + return(0); + } + + std::vector p,n; + + for(int i=0; i<=length; i++) { + if (s.c_str()[length-i]=='P') { + p.push_back(i); + } else if (s.c_str()[length-i]=='N') { + n.push_back(i); + } + } + + std::string t, u; + int c,f; + std::map w; + + for(int i=0; i::const_iterator ii; + std::vector d; + std::multimap sorted_by_value; + + for(ii = w.begin(); ii!=w.end(); ii++) + sorted_by_value.insert(std::pair((*ii).second,(*ii).first)); + + for (std::multimap::iterator it = sorted_by_value.begin(); + it != sorted_by_value.end(); ++it) { + d.push_back((*it).second); + } + + int int_W=0; + int int_d; + + while(d.size()>0 && (w[int_d=d.back()] > int_W)) { + d.pop_back(); + retstring=s; // hmmm + int x=0; + int z=abs(int_d)-1; + + if(int_d>0) { + + for(int base=0; baseint_W) { + int_W = x; + t = retstring; + c = int_d; // tofix + } + + } d.pop_back(); // hmm + + u = t; + + for(int i=0; i(N.getOperand(1))->getValue(); + + bool flippedSign; + unsigned preliminaryShift=0; + + assert(constant > 0 && "erk, don't multiply by zero or negative nums\n"); + + // first, we make the constant to multiply by positive + if(constant<0) { + constant=-constant; + flippedSign=true; + } else { + flippedSign=false; + } + + // next, we make it odd. + for(; (constant%2==0); preliminaryShift++) + constant>>=1; + + //OK, we have a positive, odd number of 64 bits or less. Convert it + //to a binary string, constantString[0] is the LSB + char constantString[65]; + for(int i=0; i<64; i++) + constantString[i]='0'+((constant>>i)&0x1); + constantString[64]=0; + + // now, Booth encode it + std::string boothEncodedString; + boothEncode(constantString, boothEncodedString); + + std::vector ops; + // do the transformation, filling out 'ops' + lefevre(boothEncodedString, ops); + + SDOperand results[ops.size()]; // temporary results (of adds/subs of shifts) + + // now turn 'ops' into DAG bits + for(int i=0; igetConstant(ops[i].firstShift, MVT::i64); + SDOperand val = (ops[i].firstVal == 0) ? N.getOperand(0) : + results[ops[i].firstVal-1]; + SDOperand left = ISelDAG->getNode(ISD::SHL, MVT::i64, val, amt); + amt = ISelDAG->getConstant(ops[i].secondShift, MVT::i64); + val = (ops[i].secondVal == 0) ? N.getOperand(0) : + results[ops[i].secondVal-1]; + SDOperand right = ISelDAG->getNode(ISD::SHL, MVT::i64, val, amt); + if(ops[i].isSub) + results[i] = ISelDAG->getNode(ISD::SUB, MVT::i64, left, right); + else + results[i] = ISelDAG->getNode(ISD::ADD, MVT::i64, left, right); + } + + // don't forget flippedSign and preliminaryShift! + SDOperand finalresult; + if(preliminaryShift) { + SDOperand finalshift = ISelDAG->getConstant(preliminaryShift, MVT::i64); + finalresult = ISelDAG->getNode(ISD::SHL, MVT::i64, + results[ops.size()-1], finalshift); + } else { // there was no preliminary divide-by-power-of-2 required + finalresult = results[ops.size()-1]; + } + + return finalresult; } /// ExactLog2 - This function solves for (Val == 1 << (N-1)) and returns N. It @@ -909,23 +1324,32 @@ assert(0 && "hmm, ISD::SIGN_EXTEND: shouldn't ever be reached. bad luck!\n"); } case ISD::MUL: { - Tmp1 = SelectExpr(N.getOperand(0)); - Tmp2 = SelectExpr(N.getOperand(1)); if(DestType != MVT::f64) { // TODO: speed! - // boring old integer multiply with xma - unsigned TempFR1=MakeReg(MVT::f64); - unsigned TempFR2=MakeReg(MVT::f64); - unsigned TempFR3=MakeReg(MVT::f64); - BuildMI(BB, IA64::SETFSIG, 1, TempFR1).addReg(Tmp1); - BuildMI(BB, IA64::SETFSIG, 1, TempFR2).addReg(Tmp2); - BuildMI(BB, IA64::XMAL, 1, TempFR3).addReg(TempFR1).addReg(TempFR2) - .addReg(IA64::F0); - BuildMI(BB, IA64::GETFSIG, 1, Result).addReg(TempFR3); + if(N.getOperand(1).getOpcode() != ISD::Constant) { // if not a const mul + // boring old integer multiply with xma + Tmp1 = SelectExpr(N.getOperand(0)); + Tmp2 = SelectExpr(N.getOperand(1)); + + unsigned TempFR1=MakeReg(MVT::f64); + unsigned TempFR2=MakeReg(MVT::f64); + unsigned TempFR3=MakeReg(MVT::f64); + BuildMI(BB, IA64::SETFSIG, 1, TempFR1).addReg(Tmp1); + BuildMI(BB, IA64::SETFSIG, 1, TempFR2).addReg(Tmp2); + BuildMI(BB, IA64::XMAL, 1, TempFR3).addReg(TempFR1).addReg(TempFR2) + .addReg(IA64::F0); + BuildMI(BB, IA64::GETFSIG, 1, Result).addReg(TempFR3); + return Result; // early exit + } else { // we are multiplying by an integer constant! yay + return Reg = SelectExpr(BuildConstmulSequence(N)); // avert your eyes! + } } - else // floating point multiply + else { // floating point multiply + Tmp1 = SelectExpr(N.getOperand(0)); + Tmp2 = SelectExpr(N.getOperand(1)); BuildMI(BB, IA64::FMPY, 2, Result).addReg(Tmp1).addReg(Tmp2); - return Result; + return Result; + } } case ISD::SUB: { -- 2.34.1