mirror of
https://github.com/MariaDB/server.git
synced 2025-01-15 19:42:28 +01:00
CSC#4385: slow sorting for UTF8 large table:
my_strnxfrm_utf8 now requires 2 bytes per character in filesort key, instead of 3 bytes per character. Shorter filesort keys make sorting faster.
This commit is contained in:
parent
e8ae338356
commit
9bb464487d
20 changed files with 58 additions and 16 deletions
|
@ -110,6 +110,7 @@ typedef struct my_collation_handler_st
|
|||
my_bool diff_if_only_endspace_difference);
|
||||
int (*strnxfrm)(struct charset_info_st *,
|
||||
uchar *, uint, const uchar *, uint);
|
||||
uint (*strnxfrmlen)(struct charset_info_st *, uint);
|
||||
my_bool (*like_range)(struct charset_info_st *,
|
||||
const char *s, uint s_length,
|
||||
pchar w_prefix, pchar w_one, pchar w_many,
|
||||
|
@ -259,7 +260,8 @@ extern CHARSET_INFO my_charset_cp1250_czech_ci;
|
|||
|
||||
/* declarations for simple charsets */
|
||||
extern int my_strnxfrm_simple(CHARSET_INFO *, uchar *, uint, const uchar *,
|
||||
uint);
|
||||
uint);
|
||||
uint my_strnxfrmlen_simple(CHARSET_INFO *, uint);
|
||||
extern int my_strnncoll_simple(CHARSET_INFO *, const uchar *, uint,
|
||||
const uchar *, uint, my_bool);
|
||||
|
||||
|
|
|
@ -1187,7 +1187,7 @@ sortlength(SORT_FIELD *sortorder, uint s_length, bool *multi_byte_charset)
|
|||
{
|
||||
sortorder->need_strxnfrm= 1;
|
||||
*multi_byte_charset= 1;
|
||||
sortorder->length= sortorder->length*cs->strxfrm_multiply;
|
||||
sortorder->length= cs->coll->strnxfrmlen(cs, sortorder->length);
|
||||
}
|
||||
}
|
||||
if (sortorder->field->maybe_null())
|
||||
|
@ -1200,7 +1200,7 @@ sortlength(SORT_FIELD *sortorder, uint s_length, bool *multi_byte_charset)
|
|||
sortorder->length=sortorder->item->max_length;
|
||||
if (use_strnxfrm((cs=sortorder->item->collation.collation)))
|
||||
{
|
||||
sortorder->length= sortorder->length*cs->strxfrm_multiply;
|
||||
sortorder->length= cs->coll->strnxfrmlen(cs, sortorder->length);
|
||||
sortorder->need_strxnfrm= 1;
|
||||
*multi_byte_charset= 1;
|
||||
}
|
||||
|
|
|
@ -6293,6 +6293,7 @@ static MY_COLLATION_HANDLER my_collation_big5_chinese_ci_handler =
|
|||
my_strnncoll_big5,
|
||||
my_strnncollsp_big5,
|
||||
my_strnxfrm_big5,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_big5,
|
||||
my_wildcmp_mb,
|
||||
my_strcasecmp_mb,
|
||||
|
|
|
@ -447,6 +447,7 @@ MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
|
|||
my_strnncoll_8bit_bin,
|
||||
my_strnncollsp_8bit_bin,
|
||||
my_strnxfrm_8bit_bin,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_simple,
|
||||
my_wildcmp_bin,
|
||||
my_strcasecmp_bin,
|
||||
|
@ -461,6 +462,7 @@ static MY_COLLATION_HANDLER my_collation_binary_handler =
|
|||
my_strnncoll_binary,
|
||||
my_strnncollsp_binary,
|
||||
my_strnxfrm_bin,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_simple,
|
||||
my_wildcmp_bin,
|
||||
my_strcasecmp_bin,
|
||||
|
|
|
@ -5454,6 +5454,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
|
|||
my_strnncoll_cp932,
|
||||
my_strnncollsp_cp932,
|
||||
my_strnxfrm_cp932,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_cp932,
|
||||
my_wildcmp_mb, /* wildcmp */
|
||||
my_strcasecmp_8bit,
|
||||
|
|
|
@ -593,6 +593,7 @@ static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler =
|
|||
my_strnncoll_czech,
|
||||
my_strnncollsp_czech,
|
||||
my_strnxfrm_czech,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_czech,
|
||||
my_wildcmp_8bit,
|
||||
my_strcasecmp_8bit,
|
||||
|
|
|
@ -8641,6 +8641,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
|
|||
my_strnncoll_simple, /* strnncoll */
|
||||
my_strnncollsp_simple,
|
||||
my_strnxfrm_simple, /* strnxfrm */
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_simple, /* like_range */
|
||||
my_wildcmp_mb, /* wildcmp */
|
||||
my_strcasecmp_mb,
|
||||
|
|
|
@ -8636,6 +8636,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
|
|||
my_strnncoll_simple,/* strnncoll */
|
||||
my_strnncollsp_simple,
|
||||
my_strnxfrm_simple, /* strnxfrm */
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_simple,/* like_range */
|
||||
my_wildcmp_mb, /* wildcmp */
|
||||
my_strcasecmp_mb,
|
||||
|
|
|
@ -5692,6 +5692,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
|
|||
my_strnncoll_simple, /* strnncoll */
|
||||
my_strnncollsp_simple,
|
||||
my_strnxfrm_simple, /* strnxfrm */
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_simple, /* like_range */
|
||||
my_wildcmp_mb, /* wildcmp */
|
||||
my_strcasecmp_mb, /* instr */
|
||||
|
|
|
@ -9939,6 +9939,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
|
|||
my_strnncoll_gbk,
|
||||
my_strnncollsp_gbk,
|
||||
my_strnxfrm_gbk,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_gbk,
|
||||
my_wildcmp_mb,
|
||||
my_strcasecmp_mb,
|
||||
|
|
|
@ -693,6 +693,7 @@ static MY_COLLATION_HANDLER my_collation_german2_ci_handler=
|
|||
my_strnncoll_latin1_de,
|
||||
my_strnncollsp_latin1_de,
|
||||
my_strnxfrm_latin1_de,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_simple,
|
||||
my_wildcmp_8bit,
|
||||
my_strcasecmp_8bit,
|
||||
|
|
|
@ -912,6 +912,7 @@ MY_COLLATION_HANDLER my_collation_mb_bin_handler =
|
|||
my_strnncoll_mb_bin,
|
||||
my_strnncollsp_mb_bin,
|
||||
my_strnxfrm_mb_bin,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_simple,
|
||||
my_wildcmp_mb_bin,
|
||||
my_strcasecmp_mb_bin,
|
||||
|
|
|
@ -21,6 +21,15 @@
|
|||
|
||||
#include "stdarg.h"
|
||||
|
||||
/*
|
||||
Returns the number of bytes required for strnxfrm().
|
||||
*/
|
||||
uint my_strnxfrmlen_simple(CHARSET_INFO *cs, uint len)
|
||||
{
|
||||
return len * (cs->strxfrm_multiply ? cs->strxfrm_multiply : 1);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Converts a string into its sort key.
|
||||
|
||||
|
@ -1365,6 +1374,7 @@ MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
|
|||
my_strnncoll_simple,
|
||||
my_strnncollsp_simple,
|
||||
my_strnxfrm_simple,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_simple,
|
||||
my_wildcmp_8bit,
|
||||
my_strcasecmp_8bit,
|
||||
|
|
|
@ -4627,6 +4627,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
|
|||
my_strnncoll_sjis,
|
||||
my_strnncollsp_sjis,
|
||||
my_strnxfrm_sjis,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_sjis,
|
||||
my_wildcmp_mb, /* wildcmp */
|
||||
my_strcasecmp_8bit,
|
||||
|
|
|
@ -927,6 +927,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
|
|||
my_strnncoll_tis620,
|
||||
my_strnncollsp_tis620,
|
||||
my_strnxfrm_tis620,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_tis620,
|
||||
my_wildcmp_8bit, /* wildcmp */
|
||||
my_strcasecmp_8bit,
|
||||
|
|
|
@ -8024,6 +8024,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
|
|||
my_strnncoll_ucs2_uca,
|
||||
my_strnncollsp_ucs2_uca,
|
||||
my_strnxfrm_ucs2_uca,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_ucs2,
|
||||
my_wildcmp_uca,
|
||||
NULL,
|
||||
|
@ -8504,6 +8505,7 @@ MY_COLLATION_HANDLER my_collation_any_uca_handler =
|
|||
my_strnncoll_any_uca,
|
||||
my_strnncollsp_any_uca,
|
||||
my_strnxfrm_any_uca,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_mb,
|
||||
my_wildcmp_uca,
|
||||
NULL,
|
||||
|
|
|
@ -1499,6 +1499,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
|
|||
my_strnncoll_ucs2,
|
||||
my_strnncollsp_ucs2,
|
||||
my_strnxfrm_ucs2,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_ucs2,
|
||||
my_wildcmp_ucs2_ci,
|
||||
my_strcasecmp_ucs2,
|
||||
|
@ -1513,6 +1514,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
|
|||
my_strnncoll_ucs2_bin,
|
||||
my_strnncollsp_ucs2_bin,
|
||||
my_strnxfrm_ucs2_bin,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_simple,
|
||||
my_wildcmp_ucs2_bin,
|
||||
my_strcasecmp_ucs2_bin,
|
||||
|
|
|
@ -8501,6 +8501,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
|
|||
my_strnncoll_simple,/* strnncoll */
|
||||
my_strnncollsp_simple,
|
||||
my_strnxfrm_simple, /* strnxfrm */
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_simple,/* like_range */
|
||||
my_wildcmp_mb, /* wildcmp */
|
||||
my_strcasecmp_mb,
|
||||
|
|
|
@ -2238,6 +2238,12 @@ int my_wildcmp_utf8(CHARSET_INFO *cs,
|
|||
}
|
||||
|
||||
|
||||
static
|
||||
uint my_strnxfrmlen_utf8(CHARSET_INFO *cs __attribute__((unused)), uint len)
|
||||
{
|
||||
return (len * 2 + 2) / 3;
|
||||
}
|
||||
|
||||
static int my_strnxfrm_utf8(CHARSET_INFO *cs,
|
||||
uchar *dst, uint dstlen,
|
||||
const uchar *src, uint srclen)
|
||||
|
@ -2245,29 +2251,33 @@ static int my_strnxfrm_utf8(CHARSET_INFO *cs,
|
|||
my_wc_t wc;
|
||||
int res;
|
||||
int plane;
|
||||
uchar *de = dst + dstlen;
|
||||
uchar *de= dst + dstlen;
|
||||
uchar *de_beg= de - 1;
|
||||
const uchar *se = src + srclen;
|
||||
|
||||
while( src < se && dst < de )
|
||||
while (dst < de_beg)
|
||||
{
|
||||
if ((res=my_utf8_uni(cs,&wc, src, se))<0)
|
||||
{
|
||||
if ((res=my_utf8_uni(cs,&wc, src, se)) <= 0)
|
||||
break;
|
||||
}
|
||||
src+=res;
|
||||
srclen-=res;
|
||||
|
||||
plane=(wc>>8) & 0xFF;
|
||||
wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
|
||||
|
||||
if ((res=my_uni_utf8(cs,wc,dst,de)) <0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
dst+=res;
|
||||
*dst++= wc >> 8;
|
||||
*dst++= wc & 0xFF;
|
||||
|
||||
}
|
||||
if (dst < de)
|
||||
bfill(dst, de - dst, ' ');
|
||||
|
||||
while (dst < de_beg) /* Fill the tail with keys for space character */
|
||||
{
|
||||
*dst++= 0x00;
|
||||
*dst++= 0x20;
|
||||
}
|
||||
|
||||
if (dst < de) /* Clear the last byte, if "dstlen" was an odd number */
|
||||
*de= 0x00;
|
||||
|
||||
return dstlen;
|
||||
}
|
||||
|
||||
|
@ -2306,6 +2316,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
|
|||
my_strnncoll_utf8,
|
||||
my_strnncollsp_utf8,
|
||||
my_strnxfrm_utf8,
|
||||
my_strnxfrmlen_utf8,
|
||||
my_like_range_mb,
|
||||
my_wildcmp_utf8,
|
||||
my_strcasecmp_utf8,
|
||||
|
|
|
@ -626,6 +626,7 @@ static MY_COLLATION_HANDLER my_collation_czech_ci_handler =
|
|||
my_strnncoll_win1250ch,
|
||||
my_strnncollsp_win1250ch,
|
||||
my_strnxfrm_win1250ch,
|
||||
my_strnxfrmlen_simple,
|
||||
my_like_range_win1250ch,
|
||||
my_wildcmp_8bit,
|
||||
my_strcasecmp_8bit,
|
||||
|
|
Loading…
Reference in a new issue