MWL#90: Non-merged semi-joins

- Take into account that grouping or aggregates decrease join output cardinality. (First code, can't make use of index statistics yet)
2026-05-16 20:07:13 +02:00 · 2011-05-09 10:35:55 +01:00 · 2011-05-09 10:35:55 +01:00 · 524e6aad73
commit 524e6aad73
parent ea43df7685
5 changed files with 220 additions and 20 deletions
--- a/sql/item_subselect.cc
+++ b/sql/item_subselect.cc
@ -475,10 +475,10 @@ bool Item_subselect::exec()
  return (res);
 }

-int Item_subselect::optimize()
+int Item_subselect::optimize(double *out_rows, double *cost)
 {
  int res;
-  res= engine->optimize();
+  res= engine->optimize(out_rows, cost);
  return res;
 }

@ -4085,16 +4085,218 @@ void subselect_hash_sj_engine::cleanup()
  result->cleanup(); /* Resets the temp table as well. */
 }

+JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables with_const);
+JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab);

-int subselect_hash_sj_engine::optimize()
+/*
+  Get fanout produced by tables specified in the table_map
+*/
+
+double get_fanout_with_deps(JOIN *join, table_map tset)
+{
+  /* First, recursively get all tables we depend on */
+  table_map deps_to_check= tset;
+  table_map checked_deps= 0;
+  table_map further_deps;
+  do
+  {
+    further_deps= 0;
+    Table_map_iterator tm_it(deps_to_check);
+    int tableno;
+    while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
+    {
+      /* get tableno's dependency tables that are not in needed_set */
+      further_deps |= join->map2table[tableno]->ref.depend_map & ~checked_deps;
+    }
+
+    checked_deps |= deps_to_check;
+    deps_to_check= further_deps;
+  } while (further_deps != 0);
+
+  
+  /* Now, walk the join order and calculate the fanout */
+  double fanout= 1;
+  for (JOIN_TAB *tab= first_top_level_tab(join, WITHOUT_CONST_TABLES); tab;
+       tab= next_top_level_tab(join, tab))
+  {
+    fanout *= (tab->records_read && !tab->emb_sj_nest) ? 
+                 rows2double(tab->records_read) : 1;
+  }
+  return fanout;
+}
+
+
+#if 0
+void check_out_index_stats(JOIN *join)
+{
+  ORDER *order;
+  uint n_order_items;
+
+  /*
+    First, collect the keys that we can use in each table.
+    We can use a key if 
+    - all tables refer to it.
+  */
+  key_map key_start_use[MAX_TABLES];
+  key_map key_infix_use[MAX_TABLES];
+  table_map key_used=0;
+  table_map non_key_used= 0;
+  
+  bzero(&key_start_use, sizeof(key_start_use)); //psergey-todo: safe initialization!
+  bzero(&key_infix_use, sizeof(key_infix_use));
+  
+  for (order= join->group_list; order; order= order->next)
+  {
+    Item *item= order->item[0];
+
+    if (item->real_type() == Item::FIELD_ITEM)
+    {
+      if (item->used_tables() & OUTER_REF_TABLE_BIT)
+        continue; /* outside references are like constants for us */
+
+      Field *field= ((Item_field*)item->real_item())->field;
+      uint table_no= field->table->tablenr;
+      if (!(non_key_used && table_map(1) << table_no) && 
+          !field->part_of_key.is_clear_all())
+      {
+        key_map infix_map= field->part_of_key;
+        infix_map.subtract(field->key_start);
+        key_start_use[table_no].merge(field->key_start);
+        key_infix_use[table_no].merge(infix_map);
+        key_used |= table_no;
+      }
+      continue;
+    }
+    /* 
+      Note: the below will cause clauses like GROUP BY YEAR(date) not to be
+      handled. 
+    */
+    non_key_used |= item->used_tables();
+  }
+  
+  Table_map_iterator tm_it(key_used & ~non_key_used);
+  int tableno;
+  while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
+  {
+    key_map::iterator key_it(key_start_use);
+    int keyno;
+    while ((keyno = tm_it.next_bit()) != key_map::iterator::BITMAP_END)
+    {
+      for (order= join->group_list; order; order= order->next)
+      {
+        Item *item= order->item[0];
+        if (item->used_tables() & (table_map(1) << tableno))
+        {
+          DBUG_ASSERT(item->real_type() == Item::FIELD_ITEM);
+        }
+      }
+      /*
+      if (continuation)
+      {
+        walk through list and find which key parts are occupied;
+        // note that the above can't be made any faster.
+      }
+      else
+        use rec_per_key[0];
+      
+      find out the cardinality.
+      check if cardinality decreases if we use it;
+      */
+    }
+  }
+}
+#endif
+
+
+double get_post_group_estimate(JOIN* join)
+{
+  table_map tables_in_group_list= table_map(0);
+
+  /* Find out which tables are used in GROUP BY list */
+  for (ORDER *order= join->group_list; order; order= order->next)
+  {
+    Item *item= order->item[0];
+    if (item->used_tables() & RAND_TABLE_BIT)
+      return HA_POS_ERROR; // TODO: change to join-output-estimate
+
+    tables_in_group_list|= item->used_tables();
+  }
+  tables_in_group_list &= ~PSEUDO_TABLE_BITS;
+
+  /*
+    Use join fanouts to calculate the max. number of records in the group-list
+  */
+  double fanout_rows[MAX_KEY];
+  bzero(&fanout_rows, sizeof(fanout_rows));
+  double out_rows;
+  
+  out_rows= get_fanout_with_deps(join, tables_in_group_list);
+  
+  /* 
+    Also generate max. number of records for each of the tables mentioned 
+    in the group-list. We'll use that a baseline number that we'll try to 
+    reduce by using
+     - #table-records 
+     - index statistics.
+  */
+  Table_map_iterator tm_it(tables_in_group_list);
+  int tableno;
+  while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
+  {
+    fanout_rows[tableno]= get_fanout_with_deps(join, table_map(1) << tableno);
+  }
+  
+  /*
+    Try to bring down estimates using index statistics.
+  */
+  //check_out_index_stats(join);
+  return out_rows;
+}
+
+
+int subselect_hash_sj_engine::optimize(double *out_rows, double *cost)
 {
  int res;
+  DBUG_ENTER("subselect_hash_sj_engine::optimize");
  SELECT_LEX *save_select= thd->lex->current_select;
-  thd->lex->current_select= materialize_join->select_lex;
-  res= materialize_join->optimize();
-  thd->lex->current_select= save_select;
+  JOIN *join= materialize_join;

-  return res;
+  thd->lex->current_select= join->select_lex;
+  res= join->optimize();
+
+  /* Calculate #rows and cost of join execution */
+  get_partial_join_cost(join, join->table_count - join->const_tables, 
+                        cost, out_rows);
+
+  /*
+    Adjust join output cardinality. There can be these cases:
+    - Have no GROUP BY and no aggregate funcs: we won't get into this 
+      function because such join will be processed as a merged semi-join 
+      (TODO: does it really mean we don't need to handle such cases here at 
+       all? put ASSERT)
+    - Have no GROUP BY but have aggregate funcs: output is 1 record.
+    - Have GROUP BY and have (or not) aggregate funcs:  need to adjust output 
+      cardinality.
+  */
+  thd->lex->current_select= save_select;
+  if (!join->group_list && !join->group_optimized_away &&
+      join->tmp_table_param.sum_func_count)
+  {
+    DBUG_PRINT("info",("Materialized join will have only 1 row (has "
+                       "aggregates but not GROUP BY"));
+    *out_rows= 1;
+  }
+  
+  /* Now with grouping */
+  if (join->group_list)
+  {
+    DBUG_PRINT("info",("Materialized join has grouping, trying to estimate"));
+    double output_rows= get_post_group_estimate(materialize_join);
+    DBUG_PRINT("info",("Got value of %g", output_rows));
+    *out_rows= output_rows;
+  }
+
+  DBUG_RETURN(res);
 }

 /**
--- a/sql/item_subselect.h
+++ b/sql/item_subselect.h
@ -147,7 +147,7 @@ public:
  bool mark_as_dependent(THD *thd, st_select_lex *select, Item *item);
  void fix_after_pullout(st_select_lex *new_parent, Item **ref);
  void recalc_used_tables(st_select_lex *new_parent, bool after_pullout);
-  virtual int optimize();
+  virtual int optimize(double *out_rows, double *cost);
  virtual bool exec();
  virtual void fix_length_and_dec();
  table_map used_tables() const;
@ -534,7 +534,7 @@ public:
  THD * get_thd() { return thd; }
  virtual int prepare()= 0;
  virtual void fix_length_and_dec(Item_cache** row)= 0;
-  virtual int optimize() { DBUG_ASSERT(0); return 0; }
+  virtual int optimize(double *out_rows, double *cost) { DBUG_ASSERT(0); return 0; }
  /*
    Execute the engine

@ -804,7 +804,7 @@ public:
  bool init_runtime();
  void cleanup();
  int prepare() { return 0; } /* Override virtual function in base class. */
-  int optimize();
+  int optimize(double *out_rows, double *cost);
  int exec();
  virtual void print(String *str, enum_query_type query_type);
  uint cols()
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@ -825,21 +825,16 @@ void get_delayed_table_estimates(TABLE *table,
                                 double *startup_cost)
 {
  Item_in_subselect *item= table->pos_in_table_list->jtbm_subselect;
-  item->optimize();
+  double rows;
+  double read_time;
+
+  item->optimize(&rows, &read_time);

  DBUG_ASSERT(item->engine->engine_type() ==
              subselect_engine::HASH_SJ_ENGINE);

  subselect_hash_sj_engine *hash_sj_engine=
    ((subselect_hash_sj_engine*)item->engine);
-  JOIN *join= hash_sj_engine->materialize_join;
-
-  double rows;
-  double read_time;
-
-  /* Calculate #rows and cost of join execution */
-  get_partial_join_cost(join, join->table_count - join->const_tables, 
-                        &read_time, &rows);

  *out_rows= (ha_rows)rows;
  *startup_cost= read_time;
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@ -6489,7 +6489,7 @@ JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables with_const
 JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab)
 {
  tab= next_breadth_first_tab(join, tab);
-  if (tab->bush_root_tab)
+  if (tab && tab->bush_root_tab)
    tab= NULL;
  return tab;
 }
@ -9262,6 +9262,8 @@ void JOIN::cleanup(bool full)
  SELECT * FROM t1,t2 WHERE t1.a=t2.a AND t1.b=t2.b ORDER BY t1.a,t2.c
  SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.b,t1.a
  @endcode
+
+  TODO: this function checks ORDER::used, which can only have a value of 0.
 */

 static bool
--- a/sql/table.h
+++ b/sql/table.h
@ -66,7 +66,8 @@ typedef struct st_order {
  bool   counter_used;                  /* parameter was counter of columns */
  Field  *field;			/* If tmp-table group */
  char	 *buff;				/* If tmp-table group */
-  table_map used, depend_map;
+  table_map used; /* NOTE: the below is only set to 0 but is still used by eq_ref_table */
+  table_map depend_map;
 } ORDER;

 /**