Refactoring: Factor out common code from find_best() and best_access_path(): make

find_best() call best_access_path().
2025-01-31 02:51:44 +01:00 · 2006-05-07 18:07:08 +04:00 · 2006-05-07 18:07:08 +04:00 · d268cf6db1
commit d268cf6db1
parent f6c95ee81a
1 changed files with 5 additions and 338 deletions
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@ -4327,344 +4327,11 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
    if ((rest_tables & real_table_bit) && !(rest_tables & s->dependent) &&
        (!idx|| !check_interleaving_with_nj(join->positions[idx-1].table, s)))
    {
-      double best,best_time,records;
+      double records, best;
-      best=best_time=records=DBL_MAX;
+      best_access_path(join, s, thd, rest_tables, idx, record_count, 
-      KEYUSE *best_key=0;
+                       read_time);
-      uint best_max_key_part=0;
+      records= join->positions[idx].records_read;
-      my_bool found_constraint= 0;
+      best= join->positions[idx].read_time;
      if (s->keyuse)
      {						/* Use key if possible */
 	TABLE *table=s->table;
 	KEYUSE *keyuse,*start_key=0;
 	double best_records=DBL_MAX;
 	uint max_key_part=0;
 	/* Test how we can use keys */
 	rec= s->records/MATCHING_ROWS_IN_OTHER_TABLE;  // Assumed records/key
 	for (keyuse=s->keyuse ; keyuse->table == table ;)
 	{
 	  key_part_map found_part=0;
 	  table_map found_ref=0;
 	  uint key=keyuse->key;
 	  KEY *keyinfo=table->key_info+key;
          bool ft_key=(keyuse->keypart == FT_KEYPART);
 	  uint found_ref_or_null= 0;
 	  /* Calculate how many key segments of the current key we can use */
 	  start_key=keyuse;
 	  do
 	  {
            uint keypart=keyuse->keypart;
            table_map best_part_found_ref= 0;
            double best_prev_record_reads= DBL_MAX;
 	    do
 	    {
 	      if (!(rest_tables & keyuse->used_tables) &&
 		  !(found_ref_or_null & keyuse->optimize))
 	      {
 		found_part|=keyuse->keypart_map;
                double tmp= prev_record_reads(join,
 					      (found_ref |
 					       keyuse->used_tables));
                if (tmp < best_prev_record_reads)
                {
                  best_part_found_ref= keyuse->used_tables;
                  best_prev_record_reads= tmp;
                }
 		if (rec > keyuse->ref_table_rows)
 		  rec= keyuse->ref_table_rows;
 		/*
 		  If there is one 'key_column IS NULL' expression, we can
 		  use this ref_or_null optimisation of this field
 		*/
 		found_ref_or_null|= (keyuse->optimize &
 				     KEY_OPTIMIZE_REF_OR_NULL);
              }
 	      keyuse++;
 	    } while (keyuse->table == table && keyuse->key == key &&
 		     keyuse->keypart == keypart);
 	    found_ref|= best_part_found_ref;
 	  } while (keyuse->table == table && keyuse->key == key);
 	  /*
 	    Assume that that each key matches a proportional part of table.
 	  */
          if (!found_part && !ft_key)
 	    continue;				// Nothing usable found
 	  if (rec < MATCHING_ROWS_IN_OTHER_TABLE)
 	    rec= MATCHING_ROWS_IN_OTHER_TABLE;	// Fix for small tables
          /*
 	    ft-keys require special treatment
          */
          if (ft_key)
          {
            /*
 	      Really, there should be records=0.0 (yes!)
 	      but 1.0 would be probably safer
            */
            tmp=prev_record_reads(join,found_ref);
            records=1.0;
          }
          else
          {
 	  found_constraint= 1;
 	  /*
 	    Check if we found full key
 	  */
 	  if (found_part == PREV_BITS(uint,keyinfo->key_parts) &&
 	      !found_ref_or_null)
 	  {				/* use eq key */
 	    max_key_part= (uint) ~0;
 	    if ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY |
 				   HA_END_SPACE_KEY)) == HA_NOSAME)
 	    {
 	      tmp=prev_record_reads(join,found_ref);
 	      records=1.0;
 	    }
 	    else
 	    {
 	      if (!found_ref)
 	      {					// We found a const key
 		if (table->quick_keys.is_set(key))
 		  records= (double) table->quick_rows[key];
 		else
 		{
 		  /* quick_range couldn't use key! */
 		  records= (double) s->records/rec;
 		}
 	      }
 	      else
 	      {
 		if (!(records=keyinfo->rec_per_key[keyinfo->key_parts-1]))
 		{				// Prefere longer keys
 		  records=
 		    ((double) s->records / (double) rec *
 		     (1.0 +
 		      ((double) (table->s->max_key_length-keyinfo->key_length) /
 		       (double) table->s->max_key_length)));
 		  if (records < 2.0)
 		    records=2.0;		// Can't be as good as a unique
 		}
 	      }
 	      /* Limit the number of matched rows */
 	      tmp= records;
 	      set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
 	      if (table->used_keys.is_set(key))
 	      {
 		/* we can use only index tree */
 		uint keys_per_block= table->file->block_size/2/
 		  (keyinfo->key_length+table->file->ref_length)+1;
 		tmp=record_count*(tmp+keys_per_block-1)/keys_per_block;
 	      }
 	      else
 		tmp=record_count*min(tmp,s->worst_seeks);
 	    }
 	  }
 	  else
 	  {
 	    /*
 	      Use as much key-parts as possible and a uniq key is better
 	      than a not unique key
 	      Set tmp to (previous record count) * (records / combination)
 	    */
 	    if ((found_part & 1) &&
 		(!(table->file->index_flags(key,0,0) & HA_ONLY_WHOLE_INDEX) ||
 		 found_part == PREV_BITS(uint,keyinfo->key_parts)))
 	    {
 	      max_key_part=max_part_bit(found_part);
 	      /*
 		Check if quick_range could determinate how many rows we
 		will match
 	      */
 	      if (table->quick_keys.is_set(key) &&
 		  table->quick_key_parts[key] == max_key_part)
 		tmp=records= (double) table->quick_rows[key];
 	      else
 	      {
 		/* Check if we have statistic about the distribution */
 		if ((records=keyinfo->rec_per_key[max_key_part-1]))
 		  tmp=records;
 		else
 		{
 		  /*
 		    Assume that the first key part matches 1% of the file
 		    and that the whole key matches 10 (duplicates) or 1
 		    (unique) records.
 		    Assume also that more key matches proportionally more
 		    records
 		    This gives the formula:
 		    records= (x * (b-a) + a*c-b)/(c-1)
 		    b = records matched by whole key
 		    a = records matched by first key part (10% of all records?)
 		    c = number of key parts in key
 		    x = used key parts (1 <= x <= c)
 		  */
 		  double rec_per_key;
                  rec_per_key= keyinfo->rec_per_key[keyinfo->key_parts-1] ?
 		    (double) keyinfo->rec_per_key[keyinfo->key_parts-1] :
 		    (double) s->records/rec+1;   
 		  if (!s->records)
 		    tmp=0;
 		  else if (rec_per_key/(double) s->records >= 0.01)
 		    tmp=rec_per_key;
 		  else
 		  {
 		    double a=s->records*0.01;
 		    tmp=(max_key_part * (rec_per_key - a) +
 			 a*keyinfo->key_parts - rec_per_key)/
 		      (keyinfo->key_parts-1);
 		    set_if_bigger(tmp,1.0);
 		  }
 		  records=(ulong) tmp;
 		}
 		/*
 		  If quick_select was used on a part of this key, we know
 		  the maximum number of rows that the key can match.
 		*/
 		if (table->quick_keys.is_set(key) &&
 		    table->quick_key_parts[key] <= max_key_part &&
 		    records > (double) table->quick_rows[key])
 		  tmp= records= (double) table->quick_rows[key];
 		else if (found_ref_or_null)
 		{
 		  /* We need to do two key searches to find key */
 		  tmp*= 2.0;
 		  records*= 2.0;
 		}
 	      }
 	      /* Limit the number of matched rows */
 	      set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
 	      if (table->used_keys.is_set(key))
 	      {
 		/* we can use only index tree */
 		uint keys_per_block= table->file->block_size/2/
 		  (keyinfo->key_length+table->file->ref_length)+1;
 		tmp=record_count*(tmp+keys_per_block-1)/keys_per_block;
 	      }
 	      else
 		tmp=record_count*min(tmp,s->worst_seeks);
 	    }
 	    else
 	      tmp=best_time;			// Do nothing
 	  }
          } /* not ft_key */
 	  if (tmp < best_time - records/(double) TIME_FOR_COMPARE)
 	  {
 	    best_time=tmp + records/(double) TIME_FOR_COMPARE;
 	    best=tmp;
 	    best_records=records;
 	    best_key=start_key;
 	    best_max_key_part=max_key_part;
 	  }
 	}
 	records=best_records;
      }
      /*
 	Don't test table scan if it can't be better.
 	Prefer key lookup if we would use the same key for scanning.
 	Don't do a table scan on InnoDB tables, if we can read the used
 	parts of the row from any of the used index.
 	This is because table scans uses index and we would not win
 	anything by using a table scan.
        (see comment in best_access_path() for more details on the below
         condition)
      */
      if ((records >= s->found_records || best > s->read_time) &&
 	  !(s->quick && best_key && s->quick->index == best_key->key &&
 	    best_max_key_part >= s->table->quick_key_parts[best_key->key]) &&
 	  !((s->table->file->table_flags() & HA_TABLE_SCAN_ON_INDEX) &&
 	    ! s->table->used_keys.is_clear_all() && best_key) &&
 	  !(s->table->force_index && best_key && !s->quick))
      {						// Check full join
        ha_rows rnd_records= s->found_records;
        /*
          If there is a restriction on the table, assume that 25% of the
          rows can be skipped on next part.
          This is to force tables that this table depends on before this
          table
        */
        if (found_constraint)
          rnd_records-= rnd_records/4;
        /*
          Range optimizer never proposes a RANGE if it isn't better
          than FULL: so if RANGE is present, it's always preferred to FULL.
          Here we estimate its cost.
        */
        if (s->quick)
        {
          /*
            For each record we:
             - read record range through 'quick'
             - skip rows which does not satisfy WHERE constraints
           */
          tmp= record_count *
               (s->quick->read_time +
               (s->found_records - rnd_records)/(double) TIME_FOR_COMPARE);
        }
        else
        {
          /* Estimate cost of reading table. */
          tmp= s->table->file->scan_time();
          if (s->table->map  & join->outer_join)      // Can't use join cache
          {
            /*
              For each record we have to:
              - read the whole table record 
              - skip rows which does not satisfy join condition
            */
            tmp= record_count *
                 (tmp +     
                 (s->records - rnd_records)/(double) TIME_FOR_COMPARE);
          }
          else
          {
            /* We read the table as many times as join buffer becomes full. */
            tmp*= (1.0 + floor((double) cache_record_length(join,idx) *
                               record_count /
                               (double) thd->variables.join_buff_size));
            /* 
              We don't make full cartesian product between rows in the scanned
              table and existing records because we skip all rows from the
              scanned table, which does not satisfy join condition when 
              we read the table (see flush_cached_records for details). Here we
              take into account cost to read and skip these records.
            */
            tmp+= (s->records - rnd_records)/(double) TIME_FOR_COMPARE;
          }
        }
        /*
          We estimate the cost of evaluating WHERE clause for found records
          as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus
          tmp give us total cost of using TABLE SCAN
        */
 	if (best == DBL_MAX ||
 	    (tmp  + record_count/(double) TIME_FOR_COMPARE*rnd_records <
 	     best + record_count/(double) TIME_FOR_COMPARE*records))
 	{
 	  /*
 	    If the table has a range (s->quick is set) make_join_select()
 	    will ensure that this will be used
 	  */
 	  best=tmp;
 	  records= rows2double(rnd_records);
 	  best_key=0;
 	}
      }
      join->positions[idx].records_read= records;
      join->positions[idx].key=best_key;
      join->positions[idx].table= s;
      if (!best_key && idx == join->const_tables &&
 	  s->table == join->sort_by_table &&
 	  join->unit->select_limit_cnt >= records)
 	join->sort_by_table= (TABLE*) 1;	// Must use temporary table
      /*
 	Go to the next level only if there hasn't been a better key on
 	this level! This will cut down the search for a lot simple cases!