mirror of
				https://github.com/MariaDB/server.git
				synced 2025-10-25 17:08:14 +02:00 
			
		
		
		
	
		
			
				
	
	
		
			158 lines
		
	
	
	
		
			4.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			158 lines
		
	
	
	
		
			4.8 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /* Copyright (c) 2024, MariaDB
 | |
| 
 | |
|    This program is free software; you can redistribute it and/or modify
 | |
|    it under the terms of the GNU General Public License as published by
 | |
|    the Free Software Foundation; version 2 of the License.
 | |
| 
 | |
|    This program is distributed in the hope that it will be useful,
 | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|    GNU General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with this program; if not, write to the Free Software
 | |
|    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA */
 | |
| 
 | |
| /**
 | |
|   @file
 | |
| 
 | |
|   @brief
 | |
|   This file contains SargableLeft optimization
 | |
| */
 | |
| 
 | |
| #include "mariadb.h"
 | |
| #include "sql_priv.h"
 | |
| #include <m_ctype.h>
 | |
| #include "sql_select.h"
 | |
| 
 | |
| /*
 | |
|   SargableLeft
 | |
|   ============
 | |
| 
 | |
|   This optimization makes conditions in forms like
 | |
| 
 | |
|     LEFT(key_col, N) = 'string_const'
 | |
|     SUBSTRING(key_col, 1, N) = 'string_const'
 | |
| 
 | |
|   sargable. The conditions take the first N characters of key_col and
 | |
|   compare them with a string constant.
 | |
|   However, producing index lookup intervals for this collation is complex
 | |
|   due to contractions.
 | |
| 
 | |
|   Contractions
 | |
|   ------------
 | |
|   A contraction is a property of collation where a sequence of multiple
 | |
|   characters is compared as some other character(s).
 | |
|   For example, in utfmb4_danish_ci, 'AA' is compared as one character 'Å'
 | |
|   which sorts after 'Z':
 | |
| 
 | |
|   MariaDB [test]> select a from t1 order by col1;
 | |
|   +------+
 | |
|   | col1 |
 | |
|   +------+
 | |
|   | BA1  | (1)
 | |
|   | BC   |
 | |
|   | BZ   |
 | |
|   | BAA2 | (2)
 | |
|   +------+
 | |
| 
 | |
|   Now suppose we're producing lookup ranges for condition
 | |
| 
 | |
|   LEFT(col1, 2)='BA'
 | |
| 
 | |
|   In addition to looking near 'BA' (1), we need to look into the area right
 | |
|   after 'BZ' (2), where we may find 'BAA'.
 | |
| 
 | |
|   Fortunately, this was already implemented for handling LIKE conditions in
 | |
|   form 'key_col LIKE 'BA%'. Each collation provides like_range() call which
 | |
|   produces lookup range in a collation-aware way.
 | |
| 
 | |
|   Differences between LIKE and LEFT=
 | |
|   ----------------------------------
 | |
|   So can one reduce or even rewrite conditions with LEFT() into LIKE? No, there
 | |
|   are differences.
 | |
| 
 | |
|   First, LIKE does character-by-character comparison, ignoring the collation's
 | |
|   contractions:
 | |
| 
 | |
|   MariaDB [test]> select col1, col1='AA', col1 LIKE 'AA' from t1;
 | |
|   +------+-----------+----------------+
 | |
|   | col1 | col1='AA' | col1 LIKE 'AA' |
 | |
|   +------+-----------+----------------+
 | |
|   | AA   |         1 |              1 |
 | |
|   | Å    |         1 |              0 |
 | |
|   +------+-----------+----------------+
 | |
| 
 | |
|   (However, index comparison function uses equality's comparison rules.
 | |
|   my_like_range() will produce an index range 'AA' <= col1 <= 'AA'. Reading rows
 | |
|   from it will return 'Å' as well)
 | |
| 
 | |
|   Second, LEFT imposes additional constraints on the length of both parts. For
 | |
|   example:
 | |
|   - LEFT(col,2)='string-longer-than-two-chars' - is false for any value of col.
 | |
|   - LEFT(col,2)='A' is not equivalent to (col LIKE 'A%'), consider col='Ab'.
 | |
| 
 | |
|   Take-aways
 | |
|   ----------
 | |
|   - SargableLeft makes use of my_like_range() to produce index intervals.
 | |
|   - LEFT(col, N)='foo'
 | |
|   - We ignore the value of N when producing the lookup range (this may make the
 | |
|     range to include rows for which the predicate is false)
 | |
|     = For the SUBSTRING form, we only need to check that M=1 in the
 | |
|       SUBSTRING(col, M, N)='foo'.
 | |
| */
 | |
| 
 | |
| 
 | |
| /*
 | |
|   @brief Check if this condition is sargable LEFT(key_col, N)='foo', or
 | |
|          similar condition with SUBSTRING().
 | |
| 
 | |
|   @detail
 | |
|     'foo' here can be any constant we can compute during optimization,
 | |
|     Only equality conditions are supported.
 | |
|     See SargableLeft above for details.
 | |
| 
 | |
|   @param  field      The first argument of LEFT or SUBSTRING if sargable,
 | |
|                      otherwise dereferenced to NULL
 | |
|   @param  value_idx  The index of argument that is the prefix string
 | |
|                      if sargable, otherwise dereferenced to -1
 | |
| */
 | |
| 
 | |
| bool Item_bool_func::with_sargable_substr(Item_field **field, int *value_idx) const
 | |
| {
 | |
|   int func_idx, val_idx= -1;
 | |
|   Item **func_args, *real= NULL;
 | |
|   bool ret= false;
 | |
|   enum Functype type;
 | |
|   if (functype() != EQ_FUNC)
 | |
|     goto done;
 | |
|   if (args[0]->type() == FUNC_ITEM)
 | |
|     func_idx= 0;
 | |
|   else if (args[1]->type() == FUNC_ITEM)
 | |
|     func_idx= 1;
 | |
|   else
 | |
|     goto done;
 | |
|   type= ((Item_func *) args[func_idx])->functype();
 | |
|   if (type != SUBSTR_FUNC && type != LEFT_FUNC)
 | |
|     goto done;
 | |
|   func_args= ((Item_func *) args[func_idx])->arguments();
 | |
|   real= func_args[0]->real_item();
 | |
|   val_idx= 1 - func_idx;
 | |
|   if (real->type() == FIELD_ITEM &&
 | |
|       args[val_idx]->can_eval_in_optimize() &&
 | |
|       (type == LEFT_FUNC || func_args[1]->val_int() == 1))
 | |
|   {
 | |
|     ret= true;
 | |
|     goto done;
 | |
|   }
 | |
|   real= NULL;
 | |
|   val_idx= -1;
 | |
| done:
 | |
|   if (field != NULL)
 | |
|     *field= (Item_field *) real;
 | |
|   if (value_idx != NULL)
 | |
|     *value_idx= val_idx;
 | |
|   return ret;
 | |
| }
 | |
| 
 | |
| 
 | 
