X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=utils%2FPerfectShuffle%2FPerfectShuffle.cpp;h=f80d88563168f5219dfecd1627012e9d95dcaf66;hb=00552e3875ee5f382db6c98286a241a7d0efe1b8;hp=e7b9421a589e35d7498a6104183cdd548111cff8;hpb=3060910e290949a9ac5eda8726d030790c4d60ff;p=oota-llvm.git diff --git a/utils/PerfectShuffle/PerfectShuffle.cpp b/utils/PerfectShuffle/PerfectShuffle.cpp index e7b9421a589..f80d8856316 100644 --- a/utils/PerfectShuffle/PerfectShuffle.cpp +++ b/utils/PerfectShuffle/PerfectShuffle.cpp @@ -14,18 +14,19 @@ // //===----------------------------------------------------------------------===// +#include +#include +#include #include #include -#include - struct Operator; // Masks are 4-nibble hex numbers. Values 0-7 in any nibble means that it takes -// an element from that value of the input vectors. A value of 8 means the +// an element from that value of the input vectors. A value of 8 means the // entry is undefined. // Mask manipulation functions. -static inline unsigned short MakeMask(unsigned V0, unsigned V1, +static inline unsigned short MakeMask(unsigned V0, unsigned V1, unsigned V2, unsigned V3) { return (V0 << (3*4)) | (V1 << (2*4)) | (V2 << (1*4)) | (V3 << (0*4)); } @@ -70,7 +71,7 @@ static unsigned short getLHSOnlyMask(unsigned short Mask) { /// getCompressedMask - Turn a 16-bit uncompressed mask (where each elt uses 4 /// bits) into a compressed 13-bit mask, where each elt is multiplied by 9. static unsigned getCompressedMask(unsigned short Mask) { - return getMaskElt(Mask, 0)*9*9*9 + getMaskElt(Mask, 1)*9*9 + + return getMaskElt(Mask, 0)*9*9*9 + getMaskElt(Mask, 1)*9*9 + getMaskElt(Mask, 2)*9 + getMaskElt(Mask, 3); } @@ -87,7 +88,7 @@ struct ShuffleVal { unsigned Cost; // Number of instrs used to generate this value. Operator *Op; // The Operation used to generate this value. unsigned short Arg0, Arg1; // Input operands for this value. - + ShuffleVal() : Cost(1000000) {} }; @@ -104,22 +105,25 @@ struct Operator { unsigned short ShuffleMask; unsigned short OpNum; const char *Name; - - Operator(unsigned short shufflemask, const char *name, unsigned opnum) - : ShuffleMask(shufflemask), OpNum(opnum), Name(name) { + unsigned Cost; + + Operator(unsigned short shufflemask, const char *name, unsigned opnum, + unsigned cost = 1) + : ShuffleMask(shufflemask), OpNum(opnum), Name(name), Cost(cost) { TheOperators.push_back(this); } ~Operator() { assert(TheOperators.back() == this); TheOperators.pop_back(); } - + bool isOnlyLHSOperator() const { return isOnlyLHSMask(ShuffleMask); } - + const char *getName() const { return Name; } - + unsigned getCost() const { return Cost; } + unsigned short getTransformedMask(unsigned short LHSMask, unsigned RHSMask) { // Extract the elements from LHSMask and RHSMask, as appropriate. unsigned Result = 0; @@ -156,7 +160,7 @@ static void PrintOperation(unsigned ValNo, unsigned short Vals[]) { std::cerr << "t" << ValNo; PrintMask(ThisOp, std::cerr); std::cerr << " = " << ShufTab[ThisOp].Op->getName() << "("; - + if (ShufTab[ShufTab[ThisOp].Arg0].Cost == 0) { std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg0); PrintMask(ShufTab[ThisOp].Arg0, std::cerr); @@ -168,7 +172,7 @@ static void PrintOperation(unsigned ValNo, unsigned short Vals[]) { break; } } - + if (!ShufTab[Vals[ValNo]].Op->isOnlyLHSOperator()) { std::cerr << ", "; if (ShufTab[ShufTab[ThisOp].Arg1].Cost == 0) { @@ -193,21 +197,21 @@ static unsigned getNumEntered() { return Count; } -static void EvaluateOps(unsigned short Elt, unsigned short Vals[], +static void EvaluateOps(unsigned short Elt, unsigned short Vals[], unsigned &NumVals) { if (ShufTab[Elt].Cost == 0) return; // If this value has already been evaluated, it is free. FIXME: match undefs. for (unsigned i = 0, e = NumVals; i != e; ++i) if (Vals[i] == Elt) return; - + // Otherwise, get the operands of the value, then add it. unsigned Arg0 = ShufTab[Elt].Arg0, Arg1 = ShufTab[Elt].Arg1; if (ShufTab[Arg0].Cost) EvaluateOps(Arg0, Vals, NumVals); if (Arg0 != Arg1 && ShufTab[Arg1].Cost) EvaluateOps(Arg1, Vals, NumVals); - + Vals[NumVals++] = Elt; } @@ -215,12 +219,12 @@ static void EvaluateOps(unsigned short Elt, unsigned short Vals[], int main() { // Seed the table with accesses to the LHS and RHS. ShufTab[0x0123].Cost = 0; - ShufTab[0x0123].Op = 0; + ShufTab[0x0123].Op = nullptr; ShufTab[0x0123].Arg0 = 0x0123; ShufTab[0x4567].Cost = 0; - ShufTab[0x4567].Op = 0; + ShufTab[0x4567].Op = nullptr; ShufTab[0x4567].Arg0 = 0x4567; - + // Seed the first-level of shuffles, shuffles whose inputs are the input to // the vectorshuffle operation. bool MadeChange = true; @@ -230,7 +234,7 @@ int main() { ++OpCount; std::cerr << "Starting iteration #" << OpCount << " with " << getNumEntered() << " entries established.\n"; - + // Scan the table for two reasons: First, compute the maximum cost of any // operation left in the table. Second, make sure that values with undefs // have the cheapest alternative that they match. @@ -239,7 +243,7 @@ int main() { if (!isValidMask(i)) continue; if (ShufTab[i].Cost > MaxCost) MaxCost = ShufTab[i].Cost; - + // If this value has an undef, make it be computed the cheapest possible // way of any of the things that it matches. if (hasUndefElements(i)) { @@ -266,10 +270,10 @@ int main() { UndefIdx = 3; else abort(); - + unsigned MinVal = i; unsigned MinCost = ShufTab[i].Cost; - + // Scan the 8 entries. for (unsigned j = 0; j != 8; ++j) { unsigned NewElt = setMaskElt(i, UndefIdx, j); @@ -278,15 +282,15 @@ int main() { MinVal = NewElt; } } - + // If we found something cheaper than what was here before, use it. if (i != MinVal) { MadeChange = true; ShufTab[i] = ShufTab[MinVal]; } - } + } } - + for (unsigned LHS = 0; LHS != 0x8889; ++LHS) { if (!isValidMask(LHS)) continue; if (ShufTab[LHS].Cost > 1000) continue; @@ -295,14 +299,14 @@ int main() { // we already have, don't consider it. if (ShufTab[LHS].Cost + 1 >= MaxCost) continue; - + for (unsigned opnum = 0, e = TheOperators.size(); opnum != e; ++opnum) { Operator *Op = TheOperators[opnum]; // Evaluate op(LHS,LHS) unsigned ResultMask = Op->getTransformedMask(LHS, LHS); - unsigned Cost = ShufTab[LHS].Cost + 1; + unsigned Cost = ShufTab[LHS].Cost + Op->getCost(); if (Cost < ShufTab[ResultMask].Cost) { ShufTab[ResultMask].Cost = Cost; ShufTab[ResultMask].Op = Op; @@ -310,20 +314,20 @@ int main() { ShufTab[ResultMask].Arg1 = LHS; MadeChange = true; } - + // If this is a two input instruction, include the op(x,y) cases. If // this is a one input instruction, skip this. if (Op->isOnlyLHSOperator()) continue; - + for (unsigned RHS = 0; RHS != 0x8889; ++RHS) { if (!isValidMask(RHS)) continue; if (ShufTab[RHS].Cost > 1000) continue; - + // If nothing involving this operand could possibly be cheaper than // what we already have, don't consider it. if (ShufTab[RHS].Cost + 1 >= MaxCost) continue; - + // Evaluate op(LHS,RHS) unsigned ResultMask = Op->getTransformedMask(LHS, RHS); @@ -332,7 +336,7 @@ int main() { ShufTab[ResultMask].Cost <= ShufTab[LHS].Cost || ShufTab[ResultMask].Cost <= ShufTab[RHS].Cost) continue; - + // Figure out the cost to evaluate this, knowing that CSE's only need // to be evaluated once. unsigned short Vals[30]; @@ -340,7 +344,7 @@ int main() { EvaluateOps(LHS, Vals, NumVals); EvaluateOps(RHS, Vals, NumVals); - unsigned Cost = NumVals + 1; + unsigned Cost = NumVals + Op->getCost(); if (Cost < ShufTab[ResultMask].Cost) { ShufTab[ResultMask].Cost = Cost; ShufTab[ResultMask].Op = Op; @@ -352,10 +356,10 @@ int main() { } } } - + std::cerr << "Finished Table has " << getNumEntered() << " entries established.\n"; - + unsigned CostArray[10] = { 0 }; // Compute a cost histogram. @@ -366,38 +370,38 @@ int main() { else ++CostArray[ShufTab[i].Cost]; } - + for (unsigned i = 0; i != 9; ++i) if (CostArray[i]) std::cout << "// " << CostArray[i] << " entries have cost " << i << "\n"; if (CostArray[9]) std::cout << "// " << CostArray[9] << " entries have higher cost!\n"; - - + + // Build up the table to emit. std::cout << "\n// This table is 6561*4 = 26244 bytes in size.\n"; std::cout << "static const unsigned PerfectShuffleTable[6561+1] = {\n"; - + for (unsigned i = 0; i != 0x8889; ++i) { if (!isValidMask(i)) continue; - + // CostSat - The cost of this operation saturated to two bits. unsigned CostSat = ShufTab[i].Cost; if (CostSat > 4) CostSat = 4; if (CostSat == 0) CostSat = 1; --CostSat; // Cost is now between 0-3. - + unsigned OpNum = ShufTab[i].Op ? ShufTab[i].Op->OpNum : 0; assert(OpNum < 16 && "Too few bits to encode operation!"); - + unsigned LHS = getCompressedMask(ShufTab[i].Arg0); unsigned RHS = getCompressedMask(ShufTab[i].Arg1); - + // Encode this as 2 bits of saturated cost, 4 bits of opcodes, 13 bits of // LHS, and 13 bits of RHS = 32 bits. unsigned Val = (CostSat << 30) | (OpNum << 26) | (LHS << 13) | RHS; - std::cout << " " << Val << "U,\t// "; + std::cout << " " << std::setw(10) << Val << "U, // "; PrintMask(i, std::cout); std::cout << ": Cost " << ShufTab[i].Cost; std::cout << " " << (ShufTab[i].Op ? ShufTab[i].Op->getName() : "copy"); @@ -417,7 +421,7 @@ int main() { } } std::cout << "\n"; - } + } std::cout << " 0\n};\n"; if (0) { @@ -427,7 +431,7 @@ int main() { if (ShufTab[i].Cost < 1000) { PrintMask(i, std::cerr); std::cerr << " - Cost " << ShufTab[i].Cost << " - "; - + unsigned short Vals[30]; unsigned NumVals = 0; EvaluateOps(i, Vals, NumVals); @@ -441,8 +445,6 @@ int main() { } -#define GENERATE_ALTIVEC - #ifdef GENERATE_ALTIVEC ///===---------------------------------------------------------------------===// @@ -495,3 +497,76 @@ vsldoi<2> the_vsldoi2("vsldoi8" , OP_VSLDOI8); vsldoi<3> the_vsldoi3("vsldoi12", OP_VSLDOI12); #endif + +#define GENERATE_NEON + +#ifdef GENERATE_NEON +enum { + OP_COPY = 0, // Copy, used for things like to say it is <0,1,2,3> + OP_VREV, + OP_VDUP0, + OP_VDUP1, + OP_VDUP2, + OP_VDUP3, + OP_VEXT1, + OP_VEXT2, + OP_VEXT3, + OP_VUZPL, // VUZP, left result + OP_VUZPR, // VUZP, right result + OP_VZIPL, // VZIP, left result + OP_VZIPR, // VZIP, right result + OP_VTRNL, // VTRN, left result + OP_VTRNR // VTRN, right result +}; + +struct vrev : public Operator { + vrev() : Operator(0x1032, "vrev", OP_VREV) {} +} the_vrev; + +template +struct vdup : public Operator { + vdup(const char *N, unsigned Opc) + : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {} +}; + +vdup<0> the_vdup0("vdup0", OP_VDUP0); +vdup<1> the_vdup1("vdup1", OP_VDUP1); +vdup<2> the_vdup2("vdup2", OP_VDUP2); +vdup<3> the_vdup3("vdup3", OP_VDUP3); + +template +struct vext : public Operator { + vext(const char *Name, unsigned Opc) + : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) { + } +}; + +vext<1> the_vext1("vext1", OP_VEXT1); +vext<2> the_vext2("vext2", OP_VEXT2); +vext<3> the_vext3("vext3", OP_VEXT3); + +struct vuzpl : public Operator { + vuzpl() : Operator(0x0246, "vuzpl", OP_VUZPL, 2) {} +} the_vuzpl; + +struct vuzpr : public Operator { + vuzpr() : Operator(0x1357, "vuzpr", OP_VUZPR, 2) {} +} the_vuzpr; + +struct vzipl : public Operator { + vzipl() : Operator(0x0415, "vzipl", OP_VZIPL, 2) {} +} the_vzipl; + +struct vzipr : public Operator { + vzipr() : Operator(0x2637, "vzipr", OP_VZIPR, 2) {} +} the_vzipr; + +struct vtrnl : public Operator { + vtrnl() : Operator(0x0426, "vtrnl", OP_VTRNL, 2) {} +} the_vtrnl; + +struct vtrnr : public Operator { + vtrnr() : Operator(0x1537, "vtrnr", OP_VTRNR, 2) {} +} the_vtrnr; + +#endif