Bug#48846: Too much time spent in ha_partition::records_in_range if not able to prune

Problem was that ha_partition::records_in_range called records_in_range for all non pruned partitions, even if an estimate should be given. Solution is to only use 1/3 of the partitions (up to 10) for records_in_range and estimate the total from this subset. (And continue until a non zero return value from the called partitions records_in_range is given, since 0 means no rows will match.) sql/ha_partition.cc: Bug#48846: Too much time spent in ha_partition::records_in_range if not able to prune estimate_rows_upper_bound and records_in_range are very similar (the only difference is the function and its parameters to use) so I created a common function for this. Since these calls from the optimizer are only estimates, it is not neccesary to call them for every partition, it can use a much smaller subset of the used partitions instead, which improves performance for selects. sql/ha_partition.h: Bug#48846: Too much time spent in ha_partition::records_in_range if not able to prune Added two private functions to help some optimizer calls.
2025-03-29 18:35:35 +01:00 · 2009-11-17 20:02:16 +01:00 · 2009-11-17 20:02:16 +01:00 · 8734933400
commit 8734933400
parent 552965eff9
2 changed files with 105 additions and 51 deletions
--- a/sql/ha_partition.cc
+++ b/sql/ha_partition.cc
@ -5747,6 +5747,23 @@ const key_map *ha_partition::keys_to_use_for_scanning()
  DBUG_RETURN(m_file[0]->keys_to_use_for_scanning());
 }

+#define MAX_PARTS_FOR_OPTIMIZER_CALLS 10
+/*
+  Prepare start variables for estimating optimizer costs.
+
+  @param[out] num_used_parts  Number of partitions after pruning.
+  @param[out] check_min_num   Number of partitions to call.
+  @param[out] first           first used partition.
+*/
+void ha_partition::partitions_optimizer_call_preparations(uint *first,
+                                                          uint *num_used_parts,
+                                                          uint *check_min_num)
+{
+  *first= bitmap_get_first_set(&(m_part_info->used_partitions));
+  *num_used_parts= bitmap_bits_set(&(m_part_info->used_partitions));
+  *check_min_num= min(MAX_PARTS_FOR_OPTIMIZER_CALLS, *num_used_parts);
+}
+

 /*
  Return time for a scan of the table
@ -5760,43 +5777,67 @@ const key_map *ha_partition::keys_to_use_for_scanning()

 double ha_partition::scan_time()
 {
-  double scan_time= 0;
-  handler **file;
+  double scan_time= 0.0;
+  uint first, part_id, num_used_parts, check_min_num, partitions_called= 0;
  DBUG_ENTER("ha_partition::scan_time");

-  for (file= m_file; *file; file++)
-    if (bitmap_is_set(&(m_part_info->used_partitions), (file - m_file)))
-      scan_time+= (*file)->scan_time();
+  partitions_optimizer_call_preparations(&first, &num_used_parts, &check_min_num);
+  for (part_id= first; partitions_called < num_used_parts ; part_id++)
+  {
+    if (!bitmap_is_set(&(m_part_info->used_partitions), part_id))
+      continue;
+    scan_time+= m_file[part_id]->scan_time();
+    partitions_called++;
+    if (partitions_called >= check_min_num && scan_time != 0.0)
+    {
+      DBUG_RETURN(scan_time *
+                      (double) num_used_parts / (double) partitions_called);
+    }
+  }
  DBUG_RETURN(scan_time);
 }


 /*
-  Get time to read
+  Estimate rows for records_in_range or estimate_rows_upper_bound.

-  SYNOPSIS
-    read_time()
-    index                Index number used
-    ranges               Number of ranges
-    rows                 Number of rows
+  @param is_records_in_range  call records_in_range instead of
+                              estimate_rows_upper_bound.
+  @param inx                  (only for records_in_range) index to use.
+  @param min_key              (only for records_in_range) start of range.
+  @param max_key              (only for records_in_range) end of range.

-  RETURN VALUE
-    time for read
-
-  DESCRIPTION
-    This will be optimised later to include whether or not the index can
-    be used with partitioning. To achieve we need to add another parameter
-    that specifies how many of the index fields that are bound in the ranges.
-    Possibly added as a new call to handlers.
+  @return Number of rows or HA_POS_ERROR.
 */
-
-double ha_partition::read_time(uint index, uint ranges, ha_rows rows)
+ha_rows ha_partition::estimate_rows(bool is_records_in_range, uint inx,
+                                    key_range *min_key, key_range *max_key)
 {
-  DBUG_ENTER("ha_partition::read_time");
+  ha_rows rows, estimated_rows= 0;
+  uint first, part_id, num_used_parts, check_min_num, partitions_called= 0;
+  DBUG_ENTER("ha_partition::records_in_range");

-  DBUG_RETURN(m_file[0]->read_time(index, ranges, rows));
+  partitions_optimizer_call_preparations(&first, &num_used_parts, &check_min_num);
+  for (part_id= first; partitions_called < num_used_parts ; part_id++)
+  {
+    if (!bitmap_is_set(&(m_part_info->used_partitions), part_id))
+      continue;
+    if (is_records_in_range)
+      rows= m_file[part_id]->records_in_range(inx, min_key, max_key);
+    else
+      rows= m_file[part_id]->estimate_rows_upper_bound();
+    if (rows == HA_POS_ERROR)
+      DBUG_RETURN(HA_POS_ERROR);
+    estimated_rows+= rows;
+    partitions_called++;
+    if (partitions_called >= check_min_num && estimated_rows)
+    {
+      DBUG_RETURN(estimated_rows * num_used_parts / partitions_called);
+    }
+  }
+  DBUG_RETURN(estimated_rows);
 }

+
 /*
  Find number of records in a range

@ -5824,22 +5865,9 @@ double ha_partition::read_time(uint index, uint ranges, ha_rows rows)
 ha_rows ha_partition::records_in_range(uint inx, key_range *min_key,
 				       key_range *max_key)
 {
-  handler **file;
-  ha_rows in_range= 0;
  DBUG_ENTER("ha_partition::records_in_range");

-  file= m_file;
-  do
-  {
-    if (bitmap_is_set(&(m_part_info->used_partitions), (file - m_file)))
-    {
-      ha_rows tmp_in_range= (*file)->records_in_range(inx, min_key, max_key);
-      if (tmp_in_range == HA_POS_ERROR)
-        DBUG_RETURN(tmp_in_range);
-      in_range+= tmp_in_range;
-    }
-  } while (*(++file));
-  DBUG_RETURN(in_range);
+  DBUG_RETURN(estimate_rows(TRUE, inx, min_key, max_key));
 }


@ -5855,22 +5883,36 @@ ha_rows ha_partition::records_in_range(uint inx, key_range *min_key,

 ha_rows ha_partition::estimate_rows_upper_bound()
 {
-  ha_rows rows, tot_rows= 0;
-  handler **file;
  DBUG_ENTER("ha_partition::estimate_rows_upper_bound");

-  file= m_file;
-  do
-  {
-    if (bitmap_is_set(&(m_part_info->used_partitions), (file - m_file)))
-    {
-      rows= (*file)->estimate_rows_upper_bound();
-      if (rows == HA_POS_ERROR)
-        DBUG_RETURN(HA_POS_ERROR);
-      tot_rows+= rows;
-    }
-  } while (*(++file));
-  DBUG_RETURN(tot_rows);
+  DBUG_RETURN(estimate_rows(FALSE, 0, NULL, NULL));
+}
+
+
+/*
+  Get time to read
+
+  SYNOPSIS
+    read_time()
+    index                Index number used
+    ranges               Number of ranges
+    rows                 Number of rows
+
+  RETURN VALUE
+    time for read
+
+  DESCRIPTION
+    This will be optimised later to include whether or not the index can
+    be used with partitioning. To achieve we need to add another parameter
+    that specifies how many of the index fields that are bound in the ranges.
+    Possibly added as a new call to handlers.
+*/
+
+double ha_partition::read_time(uint index, uint ranges, ha_rows rows)
+{
+  DBUG_ENTER("ha_partition::read_time");
+
+  DBUG_RETURN(m_file[0]->read_time(index, ranges, rows));
 }


--- a/sql/ha_partition.h
+++ b/sql/ha_partition.h
@ -547,6 +547,18 @@ public:
     -------------------------------------------------------------------------
  */

+private:
+  /*
+    Helper function to get the minimum number of partitions to use for
+    the optimizer hints/cost calls.
+  */
+  void partitions_optimizer_call_preparations(uint *num_used_parts,
+                                              uint *check_min_num,
+                                              uint *first);
+  ha_rows estimate_rows(bool is_records_in_range, uint inx,
+                        key_range *min_key, key_range *max_key);
+public:
+
  /*
    keys_to_use_for_scanning can probably be implemented as the
    intersection of all underlying handlers if mixed handlers are used.