--- /dev/null
+/*\r
+\r
+ Derby - Class org.apache.derby.impl.store.access.sort.ExternalSortFactory\r
+\r
+ Licensed to the Apache Software Foundation (ASF) under one or more\r
+ contributor license agreements. See the NOTICE file distributed with\r
+ this work for additional information regarding copyright ownership.\r
+ The ASF licenses this file to you under the Apache License, Version 2.0\r
+ (the "License"); you may not use this file except in compliance with\r
+ the License. You may obtain a copy of the License at\r
+\r
+ http://www.apache.org/licenses/LICENSE-2.0\r
+\r
+ Unless required by applicable law or agreed to in writing, software\r
+ distributed under the License is distributed on an "AS IS" BASIS,\r
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+ See the License for the specific language governing permissions and\r
+ limitations under the License.\r
+\r
+ */\r
+\r
+package org.apache.derby.impl.store.access.sort;\r
+\r
+import java.util.Properties;\r
+\r
+import org.apache.derby.iapi.services.monitor.ModuleControl;\r
+import org.apache.derby.iapi.services.monitor.ModuleSupportable;\r
+import org.apache.derby.iapi.services.monitor.Monitor;\r
+import org.apache.derby.iapi.services.property.PropertyUtil;\r
+import org.apache.derby.iapi.services.sanity.SanityManager;\r
+\r
+import org.apache.derby.iapi.error.StandardException;\r
+\r
+import org.apache.derby.iapi.store.access.conglomerate.Sort;\r
+import org.apache.derby.iapi.store.access.conglomerate.SortFactory;\r
+\r
+import org.apache.derby.iapi.store.access.SortObserver;\r
+import org.apache.derby.iapi.store.access.SortCostController;\r
+import org.apache.derby.iapi.store.access.ColumnOrdering;\r
+import org.apache.derby.iapi.store.access.TransactionController;\r
+\r
+import org.apache.derby.iapi.types.DataValueDescriptor;\r
+\r
+import org.apache.derby.iapi.services.uuid.UUIDFactory;\r
+\r
+import org.apache.derby.catalog.UUID;\r
+\r
+// For JavaDoc references (i.e. @see)\r
+import org.apache.derby.iapi.store.access.conglomerate.MethodFactory;\r
+\r
+/**\r
+\r
+**/\r
+\r
+public class ExternalSortFactory implements \r
+ SortFactory, ModuleControl, ModuleSupportable, SortCostController\r
+{\r
+\r
+ private boolean userSpecified; // did the user specify sortBufferMax\r
+ private int defaultSortBufferMax; \r
+ private int sortBufferMax;\r
+\r
+ private static final String IMPLEMENTATIONID = "sort external";\r
+ private static final String FORMATUUIDSTRING = "D2976090-D9F5-11d0-B54D-00A024BF8879";\r
+ private UUID formatUUID = null;\r
+ private static final int DEFAULT_SORTBUFFERMAX = 1024;\r
+ private static final int MINIMUM_SORTBUFFERMAX = 4;\r
+\r
+ protected static final int DEFAULT_MEM_USE = 1024*1024; // aim for about 1Meg\r
+ // how many sort runs to combined into a larger sort run\r
+ // (DERBY-1661)\r
+ protected static final int DEFAULT_MAX_MERGE_RUN = 512; \r
+\r
+ // sizeof Node + reference to Node + 12 bytes tax\r
+ private static final int SORT_ROW_OVERHEAD = 8*4+12; \r
+\r
+\r
+ /*\r
+ ** Methods of MethodFactory\r
+ */\r
+\r
+ /**\r
+ There are no default properties for the external sort..\r
+ @see MethodFactory#defaultProperties\r
+ **/\r
+ public Properties defaultProperties()\r
+ {\r
+ return new Properties();\r
+ }\r
+\r
+ /**\r
+ @see MethodFactory#supportsImplementation\r
+ **/\r
+ public boolean supportsImplementation(String implementationId)\r
+ {\r
+ return implementationId.equals(IMPLEMENTATIONID);\r
+ }\r
+\r
+ /**\r
+ @see MethodFactory#primaryImplementationType\r
+ **/\r
+ public String primaryImplementationType()\r
+ {\r
+ return IMPLEMENTATIONID;\r
+ }\r
+\r
+ /**\r
+ @see MethodFactory#supportsFormat\r
+ **/\r
+ public boolean supportsFormat(UUID formatid)\r
+ {\r
+ return formatid.equals(formatUUID);\r
+ }\r
+\r
+ /**\r
+ @see MethodFactory#primaryFormat\r
+ **/\r
+ public UUID primaryFormat()\r
+ {\r
+ return formatUUID;\r
+ }\r
+\r
+ /*\r
+ ** Methods of SortFactory\r
+ */\r
+\r
+ /**\r
+ Create a sort.\r
+ This method could choose among different sort options, \r
+ depending on the properties etc., but currently it always\r
+ returns a merge sort.\r
+ @see SortFactory#createSort\r
+ **/\r
+ public Sort createSort(\r
+ TransactionController tran,\r
+ int segment,\r
+ Properties implParameters,\r
+ DataValueDescriptor[] template,\r
+ ColumnOrdering columnOrdering[],\r
+ SortObserver sortObserver,\r
+ boolean alreadyInOrder,\r
+ long estimatedRows,\r
+ int estimatedRowSize)\r
+ throws StandardException\r
+ {\r
+ MergeSort sort = new MergeSort();\r
+\r
+ // RESOLVE - mikem change this to use estimatedRows and \r
+ // estimatedRowSize to come up with a smarter number for sortBufferMax\r
+ // than a fixed number of rows. At least 2 possibilities:\r
+ // 1) add sortBufferMaxMem which would be the amount of memory\r
+ // the sorter could use, and then just pick the number of \r
+ // rows as (sortBufferMaxMem / (estimatedRows * estimatedRowSize)\r
+ // 2) add sortBufferUsePercentFree. This would be how much of\r
+ // the current free memory can the current sort use.\r
+ //\r
+\r
+ if (!userSpecified) \r
+ {\r
+ // derby.storage.sortBufferMax is not specified by the\r
+ // user, use default or try to figure out a reasonable sort\r
+ // size.\r
+\r
+ // if we have some idea on row size, set sort approx 1 meg of\r
+ // memory sort.\r
+ if (estimatedRowSize > 0)\r
+ {\r
+ // \r
+ // for each column, there is a reference from the key array and\r
+ // the 4 bytes reference to the column object plus 12 bytes\r
+ // tax on the column object \r
+ // for each row, SORT_ROW_OVERHEAD is the Node and 4 bytes to\r
+ // point to the column array and 4 for alignment\r
+ //\r
+ estimatedRowSize += SORT_ROW_OVERHEAD +\r
+ (template.length*(4+12)) + 8; \r
+ sortBufferMax = DEFAULT_MEM_USE/estimatedRowSize;\r
+ }\r
+ else\r
+ {\r
+ sortBufferMax = defaultSortBufferMax;\r
+ }\r
+ \r
+ // if there are barely more rows than sortBufferMax, use 2\r
+ // smaller runs of similar size instead of one larger run\r
+ //\r
+ // 10% slush is added to estimated Rows to catch the case where\r
+ // estimated rows underestimate the actual number of rows by 10%.\r
+ //\r
+ if (estimatedRows > sortBufferMax &&\r
+ (estimatedRows*1.1) < sortBufferMax*2)\r
+ sortBufferMax = (int)(estimatedRows/2 + estimatedRows/10);\r
+\r
+ // Make sure it is at least the minimum sort buffer size\r
+ if (sortBufferMax < MINIMUM_SORTBUFFERMAX)\r
+ sortBufferMax = MINIMUM_SORTBUFFERMAX;\r
+ }\r
+ else\r
+ {\r
+ // if user specified derby.storage.sortBufferMax, use it.\r
+ sortBufferMax = defaultSortBufferMax;\r
+ }\r
+\r
+ if (SanityManager.DEBUG)\r
+ {\r
+ if (SanityManager.DEBUG_ON("SortTuning"))\r
+ {\r
+ SanityManager.DEBUG("SortTuning",\r
+ "sortBufferMax = " + sortBufferMax + \r
+ " estimatedRows = " + estimatedRows +\r
+ " estimatedRowSize = " + estimatedRowSize +\r
+ " defaultSortBufferMax = " + defaultSortBufferMax);\r
+ }\r
+ }\r
+\r
+ sort.initialize(\r
+ template, columnOrdering, sortObserver, \r
+ alreadyInOrder, estimatedRows, sortBufferMax);\r
+ return sort;\r
+ }\r
+\r
+ /**\r
+ * Return an open SortCostController.\r
+ * <p>\r
+ * Return an open SortCostController which can be used to ask about \r
+ * the estimated costs of SortController() operations.\r
+ * <p>\r
+ *\r
+ * @return The open SortCostController.\r
+ *\r
+ * @exception StandardException Standard exception policy.\r
+ *\r
+ * @see SortCostController\r
+ **/\r
+ public SortCostController openSortCostController()\r
+ throws StandardException\r
+ {\r
+ return(this);\r
+ }\r
+\r
+ /*\r
+ ** Methods of SortCostController\r
+ */\r
+\r
+ public void close()\r
+ {\r
+ // nothing to do.\r
+ }\r
+\r
+ /**\r
+ * Short one line description of routine.\r
+ * <p>\r
+ * The sort algorithm is a N * log(N) algorithm. The following numbers\r
+ * on a PII, 400 MHZ machine, jdk117 with jit, insane.zip. This test\r
+ * is a simple "select * from table order by first_int_column. I then\r
+ * subtracted the time it takes to do "select * from table" from the\r
+ * result.\r
+ *\r
+ * number of rows elaspsed time in seconds\r
+ * -------------- -----------------------------\r
+ * 1000 0.20\r
+ * 10000 10.5\r
+ * 100000 80.0\r
+ *\r
+ * We assume that the formula for sort performance is of the form:\r
+ * performance = K * N * log(N). Solving the equation for the 1000\r
+ * and 100000 case we come up with:\r
+ *\r
+ * performance = 1 + 0.08 N ln(n)\r
+ *\r
+ * NOTE: Apparently, these measurements were done on a faster machine\r
+ * than was used for other performance measurements used by the optimizer.\r
+ * Experiments show that the 0.8 multiplier is off by a factor of 4\r
+ * with respect to other measurements (such as the time it takes to\r
+ * scan a conglomerate). I am correcting the formula to use 0.32\r
+ * rather than 0.08.\r
+ *\r
+ * - Jeff\r
+ *\r
+ * <p>\r
+ * RESOLVE (mikem) - this formula is very crude at the moment and will be\r
+ * refined later. known problems:\r
+ * 1) internal vs. external sort - we know that the performance of sort\r
+ * is discontinuous when we go from an internal to an external sort.\r
+ * A better model is probably a different set of contants for internal\r
+ * vs. external sort and some way to guess when this is going to happen.\r
+ * 2) current row size is never considered but is critical to performance.\r
+ * 3) estimatedExportRows is not used. This is a critical number to know\r
+ * if an internal vs. an external sort will happen. \r
+ *\r
+ * <p>\r
+ *\r
+ * @return The identifier to be used to open the conglomerate later.\r
+ *\r
+ *\r
+ * @exception StandardException Standard exception policy.\r
+ **/\r
+ public double getSortCost(\r
+ DataValueDescriptor[] template,\r
+ ColumnOrdering columnOrdering[],\r
+ boolean alreadyInOrder,\r
+ long estimatedInputRows,\r
+ long estimatedExportRows,\r
+ int estimatedRowSize)\r
+ throws StandardException\r
+ {\r
+ /* Avoid taking the log of 0 */\r
+ if (estimatedInputRows == 0)\r
+ return 0.0;\r
+\r
+ // RESOLVE - come up with some real benchmark. For now the cost\r
+ // of sort is 3 times the cost of scanning the data.\r
+\r
+ if (SanityManager.DEBUG)\r
+ {\r
+ SanityManager.ASSERT(estimatedInputRows >= 0);\r
+ SanityManager.ASSERT(estimatedExportRows >= 0);\r
+ }\r
+\r
+ double ret_val = \r
+ 1 + \r
+ ((0.32) * (estimatedInputRows) * Math.log(estimatedInputRows));\r
+\r
+ return(ret_val);\r
+ }\r
+\r
+ /*\r
+ ** Methods of ModuleControl.\r
+ */\r
+\r
+ public boolean canSupport(Properties startParams) {\r
+\r
+ if (startParams == null)\r
+ return false; \r
+\r
+ String impl = startParams.getProperty("derby.access.Conglomerate.type");\r
+ if (impl == null)\r
+ return false;\r
+\r
+ return supportsImplementation(impl);\r
+ }\r
+\r
+\r
+ public void boot(boolean create, Properties startParams)\r
+ throws StandardException\r
+ {\r
+ // Find the UUID factory.\r
+ UUIDFactory uuidFactory = Monitor.getMonitor().getUUIDFactory();\r
+\r
+ // Make a UUID that identifies this sort's format.\r
+ formatUUID = uuidFactory.recreateUUID(FORMATUUIDSTRING);\r
+\r
+ // See if there's a new maximum sort buffer size.\r
+ defaultSortBufferMax = PropertyUtil.getSystemInt("derby.storage.sortBufferMax",\r
+ 0, Integer.MAX_VALUE, 0);\r
+\r
+ // if defaultSortBufferMax is 0, the user did not specify\r
+ // sortBufferMax, then just set it to DEFAULT_SORTBUFFERMAX.\r
+ // if defaultSortBufferMax is not 0, the user specified sortBufferMax,\r
+ // do not override it.\r
+ if (defaultSortBufferMax == 0)\r
+ {\r
+ userSpecified = false;\r
+ defaultSortBufferMax = DEFAULT_SORTBUFFERMAX;\r
+ }\r
+ else\r
+ {\r
+ userSpecified = true;\r
+ if (defaultSortBufferMax < MINIMUM_SORTBUFFERMAX)\r
+ defaultSortBufferMax = MINIMUM_SORTBUFFERMAX;\r
+ }\r
+\r
+ }\r
+\r
+ public void stop()\r
+ {\r
+ }\r
+\r
+}\r