--- /dev/null
+/*\r
+\r
+ Derby - Class org.apache.derby.impl.sql.compile.IntersectNode\r
+\r
+ Licensed to the Apache Software Foundation (ASF) under one or more\r
+ contributor license agreements. See the NOTICE file distributed with\r
+ this work for additional information regarding copyright ownership.\r
+ The ASF licenses this file to you under the Apache License, Version 2.0\r
+ (the "License"); you may not use this file except in compliance with\r
+ the License. You may obtain a copy of the License at\r
+\r
+ http://www.apache.org/licenses/LICENSE-2.0\r
+\r
+ Unless required by applicable law or agreed to in writing, software\r
+ distributed under the License is distributed on an "AS IS" BASIS,\r
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+ See the License for the specific language governing permissions and\r
+ limitations under the License.\r
+\r
+ */\r
+\r
+package org.apache.derby.impl.sql.compile;\r
+\r
+import org.apache.derby.iapi.reference.ClassName;\r
+\r
+import org.apache.derby.iapi.services.sanity.SanityManager;\r
+import org.apache.derby.iapi.services.classfile.VMOpcode;\r
+import org.apache.derby.iapi.services.compiler.MethodBuilder;\r
+import org.apache.derby.iapi.services.context.ContextManager;\r
+\r
+import org.apache.derby.iapi.error.StandardException;\r
+\r
+import org.apache.derby.iapi.sql.compile.NodeFactory;\r
+import org.apache.derby.iapi.sql.compile.Optimizable;\r
+import org.apache.derby.iapi.sql.compile.OptimizablePredicate;\r
+import org.apache.derby.iapi.sql.compile.OptimizablePredicateList;\r
+import org.apache.derby.iapi.sql.compile.Optimizer;\r
+import org.apache.derby.iapi.sql.compile.CostEstimate;\r
+import org.apache.derby.iapi.sql.compile.RowOrdering;\r
+import org.apache.derby.iapi.sql.compile.C_NodeTypes;\r
+\r
+import org.apache.derby.iapi.sql.dictionary.ConglomerateDescriptor;\r
+\r
+import org.apache.derby.iapi.reference.SQLState;\r
+\r
+import org.apache.derby.iapi.types.DataTypeDescriptor;\r
+\r
+import org.apache.derby.iapi.util.JBitSet;\r
+import org.apache.derby.iapi.util.ReuseFactory;\r
+\r
+import java.sql.Types;\r
+\r
+import java.util.BitSet;\r
+\r
+/**\r
+ * A IntersectOrExceptNode represents an INTERSECT or EXCEPT DML statement.\r
+ *\r
+ */\r
+\r
+public class IntersectOrExceptNode extends SetOperatorNode\r
+{\r
+ /* Currently we implement INTERSECT and EXCEPT by rewriting\r
+ * t1 (INTERSECT|EXCEPT) [ALL] t2\r
+ * as (roughly)\r
+ * setOpResultSet( opType, all, (select * from t1 order by 1,2,...n), (select * from t2 ORDER BY 1,2,...,n))\r
+ * where n is the number of columns in t1 (which must be the same as the number of columns in t2),\r
+ * and opType is INTERSECT, or EXCEPT.\r
+ *\r
+ * The setOpResultSet result set simultaneously scans through its two ordered inputs and\r
+ * performs the intersect or except.\r
+ *\r
+ * There are other query plans that may be more efficient, depending on the sizes. One plan is\r
+ * to make a hash table from one of the input tables and then look up each row of the other input\r
+ * table in the hash table. However, we have not yet implemented spilling to disk in the\r
+ * BackingStoreHashtable class: currently the whole hash table is in RAM. If we were to use it\r
+ * we would blow up on large input tables.\r
+ */\r
+\r
+ private int opType;\r
+ public static final int INTERSECT_OP = 1;\r
+ public static final int EXCEPT_OP = 2;\r
+\r
+ /* Only optimize it once */\r
+ /* Only call addNewNodes() once */\r
+ private boolean addNewNodesCalled;\r
+\r
+ private int[] intermediateOrderByColumns; // The input result sets will be ordered on these columns. 0 indexed\r
+ private int[] intermediateOrderByDirection; // ascending = 1, descending = -1\r
+\r
+ /**\r
+ * Initializer for a SetOperatorNode.\r
+ *\r
+ * @param leftResult The ResultSetNode on the left side of this union\r
+ * @param rightResult The ResultSetNode on the right side of this union\r
+ * @param all Whether or not this is an ALL.\r
+ * @param tableProperties Properties list associated with the table\r
+ *\r
+ * @exception StandardException Thrown on error\r
+ */\r
+\r
+ public void init( Object opType,\r
+ Object leftResult,\r
+ Object rightResult,\r
+ Object all,\r
+ Object tableProperties)\r
+ throws StandardException\r
+ {\r
+ super.init( leftResult, rightResult, all, tableProperties);\r
+ this.opType = ((Integer) opType).intValue();\r
+ }\r
+\r
+ private int getOpType()\r
+ {\r
+ return opType;\r
+ }\r
+ \r
+ /**\r
+ * Push order by lists down to the children so that we can implement the intersect/except\r
+ * by scan of the two sorted inputs.\r
+ *\r
+ * @param numTables Number of tables in the DML Statement\r
+ * @param gbl The group by list, if any\r
+ * @param fromList The from list, if any\r
+ *\r
+ * @return The preprocessed ResultSetNode that can be optimized\r
+ *\r
+ * @exception StandardException Thrown on error\r
+ */\r
+\r
+ public ResultSetNode preprocess(int numTables,\r
+ GroupByList gbl,\r
+ FromList fromList)\r
+ throws StandardException\r
+ {\r
+ // RESOLVE: We are in a quandary as to when and how we should generate order by lists. SelectNode processing\r
+ // requires order by lists at the start of preprocess. That is why we are doing it here. However we can\r
+ // pick any column ordering. Depending on the child expressions the optimizer may be able to avoid a\r
+ // sort if we pick the right column ordering. For instance if one of the child expressions is\r
+ // "select <key columns>, <other expressions> from T" where there is a unique index on <key columns>\r
+ // then we can just generate an order by on the key columns and the optimizer should use the unique index\r
+ // to produce the sorted result set. However the ResultSetNode class does not make it easy to\r
+ // find the structure of the query expression. Furthermore we most want to avoid a sort on the larger\r
+ // input, but the size estimate is not available at preprocess time.\r
+\r
+ intermediateOrderByColumns = new int[ getResultColumns().size()];\r
+ intermediateOrderByDirection = new int[ intermediateOrderByColumns.length];\r
+ /* If there is an order by on the result of the intersect then use that because we know that doing so\r
+ * will avoid a sort. If the output of the intersect/except is small relative to its inputs then in some\r
+ * cases it would be better to sort the inputs on a different sequence of columns, but it is hard to analyze\r
+ * the input query expressions to see if a sort can be avoided.\r
+ */\r
+ if( orderByList != null)\r
+ {\r
+ BitSet colsOrdered = new BitSet( intermediateOrderByColumns.length);\r
+ int orderByListSize = orderByList.size();\r
+ int intermediateOrderByIdx = 0;\r
+ for( int i = 0; i < orderByListSize; i++)\r
+ {\r
+ if( colsOrdered.get(i))\r
+ continue;\r
+ OrderByColumn orderByColumn = orderByList.getOrderByColumn(i);\r
+ intermediateOrderByDirection[intermediateOrderByIdx] = orderByColumn.isAscending() ? 1 : -1;\r
+ int columnIdx = orderByColumn.getResultColumn().getColumnPosition() - 1;\r
+ intermediateOrderByColumns[intermediateOrderByIdx] = columnIdx;\r
+ colsOrdered.set( columnIdx);\r
+ intermediateOrderByIdx++;\r
+ }\r
+ for( int i = 0; i < intermediateOrderByColumns.length; i++)\r
+ {\r
+ if( ! colsOrdered.get(i))\r
+ {\r
+ intermediateOrderByDirection[intermediateOrderByIdx] = 1;\r
+ intermediateOrderByColumns[intermediateOrderByIdx] = i;\r
+ intermediateOrderByIdx++;\r
+ }\r
+ }\r
+ orderByList = null; // It will be pushed down.\r
+ }\r
+ else // The output of the intersect/except does not have to be ordered\r
+ {\r
+ // Pick an intermediate ordering that minimizes the cost.\r
+ // RESOLVE: how do you do that?\r
+ for( int i = 0; i < intermediateOrderByColumns.length; i++)\r
+ {\r
+ intermediateOrderByDirection[i] = 1;\r
+ intermediateOrderByColumns[i] = i;\r
+ }\r
+ }\r
+ pushOrderingDown( leftResultSet);\r
+ pushOrderingDown( rightResultSet);\r
+\r
+ return super.preprocess( numTables, gbl, fromList);\r
+ } // end of preprocess\r
+\r
+ private void pushOrderingDown( ResultSetNode rsn)\r
+ throws StandardException\r
+ {\r
+ ContextManager cm = getContextManager();\r
+ NodeFactory nf = getNodeFactory();\r
+ OrderByList orderByList = (OrderByList) nf.getNode( C_NodeTypes.ORDER_BY_LIST, cm);\r
+ for( int i = 0; i < intermediateOrderByColumns.length; i++)\r
+ {\r
+ OrderByColumn orderByColumn = (OrderByColumn)\r
+ nf.getNode( C_NodeTypes.ORDER_BY_COLUMN,\r
+ nf.getNode(C_NodeTypes.INT_CONSTANT_NODE,\r
+ ReuseFactory.getInteger( intermediateOrderByColumns[i] + 1),\r
+ cm),\r
+ cm);\r
+ if( intermediateOrderByDirection[i] < 0)\r
+ orderByColumn.setDescending();\r
+ orderByList.addOrderByColumn( orderByColumn);\r
+ }\r
+ orderByList.bindOrderByColumns( rsn);\r
+ rsn.pushOrderByList( orderByList);\r
+ } // end of pushOrderingDown\r
+ \r
+ /**\r
+ * @see org.apache.derby.iapi.sql.compile.Optimizable#estimateCost\r
+ */\r
+ public CostEstimate estimateCost( OptimizablePredicateList predList,\r
+ ConglomerateDescriptor cd,\r
+ CostEstimate outerCost,\r
+ Optimizer optimizer,\r
+ RowOrdering rowOrdering)\r
+ throws StandardException\r
+ {\r
+ leftResultSet = optimizeSource(\r
+ optimizer,\r
+ leftResultSet,\r
+ (PredicateList) null,\r
+ outerCost);\r
+\r
+ rightResultSet = optimizeSource(\r
+ optimizer,\r
+ rightResultSet,\r
+ (PredicateList) null,\r
+ outerCost);\r
+\r
+ CostEstimate costEstimate = getCostEstimate(optimizer);\r
+ CostEstimate leftCostEstimate = leftResultSet.getCostEstimate();\r
+ CostEstimate rightCostEstimate = rightResultSet.getCostEstimate();\r
+ // The cost is the sum of the two child costs plus the cost of sorting the union.\r
+ costEstimate.setCost( leftCostEstimate.getEstimatedCost() + rightCostEstimate.getEstimatedCost(),\r
+ getRowCountEstimate( leftCostEstimate.rowCount(),\r
+ rightCostEstimate.rowCount()),\r
+ getSingleScanRowCountEstimate( leftCostEstimate.singleScanRowCount(),\r
+ rightCostEstimate.singleScanRowCount()));\r
+\r
+ return costEstimate;\r
+ } // End of estimateCost\r
+\r
+ /**\r
+ * @see Optimizable#modifyAccessPath\r
+ *\r
+ * @exception StandardException Thrown on error\r
+ */\r
+ public Optimizable modifyAccessPath(JBitSet outerTables) throws StandardException\r
+ {\r
+ Optimizable retOptimizable;\r
+ retOptimizable = super.modifyAccessPath(outerTables);\r
+\r
+ /* We only want call addNewNodes() once */\r
+ if (addNewNodesCalled)\r
+ {\r
+ return retOptimizable;\r
+ }\r
+ return (Optimizable) addNewNodes();\r
+ }\r
+\r
+ /**\r
+ * @see ResultSetNode#modifyAccessPaths\r
+ *\r
+ * @exception StandardException Thrown on error\r
+ */\r
+ public ResultSetNode modifyAccessPaths() throws StandardException\r
+ {\r
+ ResultSetNode retRSN;\r
+ retRSN = super.modifyAccessPaths();\r
+\r
+ /* We only want call addNewNodes() once */\r
+ if (addNewNodesCalled)\r
+ {\r
+ return retRSN;\r
+ }\r
+ return addNewNodes();\r
+ }\r
+\r
+ /**\r
+ * Add any new ResultSetNodes that are necessary to the tree.\r
+ * We wait until after optimization to do this in order to\r
+ * make it easier on the optimizer.\r
+ *\r
+ * @return (Potentially new) head of the ResultSetNode tree.\r
+ *\r
+ * @exception StandardException Thrown on error\r
+ */\r
+ private ResultSetNode addNewNodes()\r
+ throws StandardException\r
+ {\r
+ /* Only call addNewNodes() once */\r
+ if (addNewNodesCalled)\r
+ {\r
+ return this;\r
+ }\r
+\r
+ addNewNodesCalled = true;\r
+\r
+ if( orderByList == null)\r
+ return this;\r
+ // Generate an order by node on top of the intersect/except\r
+ return (ResultSetNode) getNodeFactory().getNode( C_NodeTypes.ORDER_BY_NODE,\r
+ this,\r
+ orderByList,\r
+ tableProperties,\r
+ getContextManager());\r
+ } // end of addNewNodes\r
+\r
+ /**\r
+ * Generate the code.\r
+ *\r
+ * @exception StandardException Thrown on error\r
+ */\r
+ public void generate( ActivationClassBuilder acb,\r
+ MethodBuilder mb)\r
+ throws StandardException\r
+ {\r
+\r
+ /* Get the next ResultSet #, so that we can number this ResultSetNode, its\r
+ * ResultColumnList and ResultSet.\r
+ */\r
+ assignResultSetNumber();\r
+\r
+ // Get our final cost estimate based on the child estimates.\r
+ costEstimate = getFinalCostEstimate();\r
+\r
+ // build up the tree.\r
+\r
+ /* Generate the SetOpResultSet. Arguments:\r
+ * 1) expression for left child ResultSet\r
+ * 2) expression for right child ResultSet\r
+ * 3) activation\r
+ * 4) resultSetNumber\r
+ * 5) estimated row count\r
+ * 6) estimated cost\r
+ * 7) opType\r
+ * 8) all\r
+ * 9) close method\r
+ * 10) intermediateOrderByColumns saved object index\r
+ * 11) intermediateOrderByDirection saved object index\r
+ */\r
+\r
+ acb.pushGetResultSetFactoryExpression(mb); // instance for getSetOpResultSet\r
+\r
+ getLeftResultSet().generate( acb, mb);\r
+ getRightResultSet().generate( acb, mb);\r
+\r
+ acb.pushThisAsActivation(mb);\r
+ mb.push(resultSetNumber);\r
+ mb.push( costEstimate.getEstimatedRowCount());\r
+ mb.push( costEstimate.getEstimatedCost());\r
+ mb.push( getOpType());\r
+ mb.push( all);\r
+ mb.push( getCompilerContext().addSavedObject( intermediateOrderByColumns));\r
+ mb.push( getCompilerContext().addSavedObject( intermediateOrderByDirection));\r
+\r
+ mb.callMethod(VMOpcode.INVOKEINTERFACE,\r
+ (String) null,\r
+ "getSetOpResultSet",\r
+ ClassName.NoPutResultSet, 10);\r
+ } // end of generate\r
+\r
+ /**\r
+ * @see ResultSetNode#getFinalCostEstimate\r
+ *\r
+ * Get the final CostEstimate for this IntersectOrExceptNode.\r
+ *\r
+ * @return The final CostEstimate for this IntersectOrExceptNode,\r
+ * which is the sum of the two child costs. The final number of\r
+ * rows depends on whether this is an INTERSECT or EXCEPT (see\r
+ * getRowCountEstimate() in this class for more).\r
+ */\r
+ public CostEstimate getFinalCostEstimate()\r
+ throws StandardException\r
+ {\r
+ if (finalCostEstimate != null)\r
+ return finalCostEstimate;\r
+\r
+ CostEstimate leftCE = leftResultSet.getFinalCostEstimate();\r
+ CostEstimate rightCE = rightResultSet.getFinalCostEstimate();\r
+\r
+ finalCostEstimate = getNewCostEstimate();\r
+ finalCostEstimate.setCost(\r
+ leftCE.getEstimatedCost() + rightCE.getEstimatedCost(),\r
+ getRowCountEstimate(leftCE.rowCount(), rightCE.rowCount()),\r
+ getSingleScanRowCountEstimate(leftCE.singleScanRowCount(),\r
+ rightCE.singleScanRowCount()));\r
+\r
+ return finalCostEstimate;\r
+ }\r
+\r
+ String getOperatorName()\r
+ {\r
+ switch( opType)\r
+ {\r
+ case INTERSECT_OP:\r
+ return "INTERSECT";\r
+\r
+ case EXCEPT_OP:\r
+ return "EXCEPT";\r
+ }\r
+ if( SanityManager.DEBUG)\r
+ SanityManager.THROWASSERT( "Invalid intersectOrExcept opType: " + opType);\r
+ return "?";\r
+ }\r
+ \r
+ double getRowCountEstimate( double leftRowCount, double rightRowCount)\r
+ {\r
+ switch( opType)\r
+ {\r
+ case INTERSECT_OP:\r
+ // The result has at most min( leftRowCount, rightRowCount). Estimate the actual row count at\r
+ // half that.\r
+ return Math.min( leftRowCount, rightRowCount)/2;\r
+\r
+ case EXCEPT_OP:\r
+ // The result has at most leftRowCount rows and at least\r
+ // max(0, leftRowCount - rightRowCount) rows. Use the mean\r
+ // of those two as the estimate.\r
+ return (leftRowCount + Math.max(0, leftRowCount - rightRowCount))/2;\r
+ }\r
+ if( SanityManager.DEBUG)\r
+ SanityManager.THROWASSERT( "Invalid intersectOrExcept opType: " + opType);\r
+ return 1.0;\r
+ } // end of getRowCountEstimate\r
+ \r
+ double getSingleScanRowCountEstimate( double leftSingleScanRowCount, double rightSingleScanRowCount)\r
+ {\r
+ return getRowCountEstimate( leftSingleScanRowCount, rightSingleScanRowCount);\r
+ }\r
+}\r