Move handling of suffix_length from strnxfrm_bin() to filesort to ensure proper sorting of all kind of binary objects

field::sort_key() now adds length last for varbinary/blob
VARBINARY/BLOB is now sorted by filesort so that shorter strings comes before longer ones
Fixed issues in test cases from last merge
This commit is contained in:
monty@mysql.com 2005-10-14 00:04:52 +03:00
parent 6f5baa1eba
commit 3da5f1c223
10 changed files with 238 additions and 77 deletions

View file

@ -2668,10 +2668,9 @@ a c
drop table t1;
CREATE TABLE t1 ( a INT NOT NULL, b INT NOT NULL, UNIQUE idx (a,b) );
INSERT INTO t1 VALUES (1,1),(1,2),(1,3),(1,4);
CREATE TABLE t2 ( a INT NOT NULL, b INT NOT NULL, c INT );
INSERT INTO t2 VALUES ( 1,10,1), (1,10,2), (1,11,1), (1,11,2), (1,2,1), (1,2,2),
(1,2,3);
SELECT t2.a, t2.b, IF(t1.b IS NULL,'',c) AS c, COUNT(*) AS d FROM t2 LEFT JOIN
CREATE TABLE t2 ( a INT NOT NULL, b INT NOT NULL, e INT );
INSERT INTO t2 VALUES ( 1,10,1), (1,10,2), (1,11,1), (1,11,2), (1,2,1), (1,2,2),(1,2,3);
SELECT t2.a, t2.b, IF(t1.b IS NULL,'',e) AS c, COUNT(*) AS d FROM t2 LEFT JOIN
t1 ON t2.a = t1.a AND t2.b = t1.b GROUP BY a, b, c;
a b c d
1 2 1 1
@ -2679,14 +2678,14 @@ a b c d
1 2 3 1
1 10 2
1 11 2
SELECT t2.a, t2.b, IF(t1.b IS NULL,'',c) AS c, COUNT(*) AS d FROM t2 LEFT JOIN
SELECT t2.a, t2.b, IF(t1.b IS NULL,'',e) AS c, COUNT(*) AS d FROM t2 LEFT JOIN
t1 ON t2.a = t1.a AND t2.b = t1.b GROUP BY t1.a, t1.b, c;
a b c d
1 10 4
1 2 1 1
1 2 2 1
1 2 3 1
SELECT t2.a, t2.b, IF(t1.b IS NULL,'',c) AS c, COUNT(*) AS d FROM t2 LEFT JOIN
SELECT t2.a, t2.b, IF(t1.b IS NULL,'',e) AS c, COUNT(*) AS d FROM t2 LEFT JOIN
t1 ON t2.a = t1.a AND t2.b = t1.b GROUP BY t2.a, t2.b, c;
a b c d
1 2 1 1
@ -2694,7 +2693,7 @@ a b c d
1 2 3 1
1 10 2
1 11 2
SELECT t2.a, t2.b, IF(t1.b IS NULL,'',c) AS c, COUNT(*) AS d FROM t2,t1
SELECT t2.a, t2.b, IF(t1.b IS NULL,'',e) AS c, COUNT(*) AS d FROM t2,t1
WHERE t2.a = t1.a AND t2.b = t1.b GROUP BY a, b, c;
a b c d
1 2 1 1

View file

@ -718,3 +718,73 @@ t1 CREATE TABLE `t1` (
KEY `a` (`a`,`b`,`d`,`e`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1
drop table t1;
CREATE table t1 (a blob);
insert into t1 values ('b'),('a\0'),('a'),('a '),('aa'),(NULL);
select hex(a) from t1 order by a;
hex(a)
NULL
61
6100
6120
6161
62
select hex(concat(a,'\0')) as b from t1 order by concat(a,'\0');
b
NULL
6100
610000
612000
616100
6200
alter table t1 modify a varbinary(5);
select hex(a) from t1 order by a;
hex(a)
NULL
61
6100
6120
6161
62
select hex(concat(a,'\0')) as b from t1 order by concat(a,'\0');
b
NULL
6100
610000
612000
616100
6200
alter table t1 modify a char(5);
select hex(a) from t1 order by a;
hex(a)
NULL
6100
61
61
6161
62
select hex(concat(a,'\0')) as b from t1 order by concat(a,'\0');
b
NULL
610000
6100
6100
616100
6200
alter table t1 modify a binary(5);
select hex(a) from t1 order by a;
hex(a)
NULL
6100000000
6100000000
6100000000
6161000000
6200000000
select hex(concat(a,'\0')) as b from t1 order by concat(a,'\0');
b
NULL
610000000000
610000000000
610000000000
616100000000
620000000000
drop table t1;

View file

@ -7,7 +7,7 @@
#
--disable_warnings
drop table if exists t1,t2,t3,t4;
drop table if exists t1,t2,t3,t4,t11;
# The following may be left from older tests
drop table if exists t1_1,t1_2,t9_1,t9_2,t1aa,t2aa;
drop view if exists v1;
@ -2232,16 +2232,15 @@ drop table t1;
CREATE TABLE t1 ( a INT NOT NULL, b INT NOT NULL, UNIQUE idx (a,b) );
INSERT INTO t1 VALUES (1,1),(1,2),(1,3),(1,4);
CREATE TABLE t2 ( a INT NOT NULL, b INT NOT NULL, c INT );
INSERT INTO t2 VALUES ( 1,10,1), (1,10,2), (1,11,1), (1,11,2), (1,2,1), (1,2,2),
(1,2,3);
SELECT t2.a, t2.b, IF(t1.b IS NULL,'',c) AS c, COUNT(*) AS d FROM t2 LEFT JOIN
CREATE TABLE t2 ( a INT NOT NULL, b INT NOT NULL, e INT );
INSERT INTO t2 VALUES ( 1,10,1), (1,10,2), (1,11,1), (1,11,2), (1,2,1), (1,2,2),(1,2,3);
SELECT t2.a, t2.b, IF(t1.b IS NULL,'',e) AS c, COUNT(*) AS d FROM t2 LEFT JOIN
t1 ON t2.a = t1.a AND t2.b = t1.b GROUP BY a, b, c;
SELECT t2.a, t2.b, IF(t1.b IS NULL,'',c) AS c, COUNT(*) AS d FROM t2 LEFT JOIN
SELECT t2.a, t2.b, IF(t1.b IS NULL,'',e) AS c, COUNT(*) AS d FROM t2 LEFT JOIN
t1 ON t2.a = t1.a AND t2.b = t1.b GROUP BY t1.a, t1.b, c;
SELECT t2.a, t2.b, IF(t1.b IS NULL,'',c) AS c, COUNT(*) AS d FROM t2 LEFT JOIN
SELECT t2.a, t2.b, IF(t1.b IS NULL,'',e) AS c, COUNT(*) AS d FROM t2 LEFT JOIN
t1 ON t2.a = t1.a AND t2.b = t1.b GROUP BY t2.a, t2.b, c;
SELECT t2.a, t2.b, IF(t1.b IS NULL,'',c) AS c, COUNT(*) AS d FROM t2,t1
SELECT t2.a, t2.b, IF(t1.b IS NULL,'',e) AS c, COUNT(*) AS d FROM t2,t1
WHERE t2.a = t1.a AND t2.b = t1.b GROUP BY a, b, c;
DROP TABLE IF EXISTS t1, t2;
@ -2635,31 +2634,18 @@ drop view v1, v2, v3;
# nested right join.
#
create table a (
id int(11) not null default '0'
) engine=myisam default charset=latin1;
insert into a values (123),(191),(192);
create table b (
id char(16) character set utf8 not null default ''
) engine=myisam default charset=latin1;
insert into b values ('58013'),('58014'),('58015'),('58016');
create table c (
a_id int(11) not null default '0',
b_id char(16) character set utf8 default null
) engine=myisam default charset=latin1;
insert into c values
(123,null),(123,null),(123,null),(123,null),(123,null),(123,'58013');
create table t1 (id int(11) not null default '0');
insert into t1 values (123),(191),(192);
create table t2 (id char(16) character set utf8 not null);
insert into t2 values ('58013'),('58014'),('58015'),('58016');
create table t3 (a_id int(11) not null, b_id char(16) character set utf8);
insert into t3 values (123,null),(123,null),(123,null),(123,null),(123,null),(123,'58013');
-- both queries are equivalent
select count(*)
from a inner join (c left join b on b.id = c.b_id) on a.id = c.a_id;
from t1 inner join (t3 left join t2 on t2.id = t3.b_id) on t1.id = t3.a_id;
select count(*)
from a inner join (b right join c on b.id = c.b_id) on a.id = c.a_id;
from t1 inner join (t2 right join t3 on t2.id = t3.b_id) on t1.id = t3.a_id;
drop table a, b, c;
drop table t1,t2,t3;

View file

@ -404,3 +404,22 @@ alter table t1 add primary key (a,b,c(255),d);
alter table t1 add key (a,b,d,e);
show create table t1;
drop table t1;
#
# Test that blob's and varbinary are sorted according to length
#
CREATE table t1 (a blob);
insert into t1 values ('b'),('a\0'),('a'),('a '),('aa'),(NULL);
select hex(a) from t1 order by a;
select hex(concat(a,'\0')) as b from t1 order by concat(a,'\0');
alter table t1 modify a varbinary(5);
select hex(a) from t1 order by a;
select hex(concat(a,'\0')) as b from t1 order by concat(a,'\0');
alter table t1 modify a char(5);
select hex(a) from t1 order by a;
select hex(concat(a,'\0')) as b from t1 order by concat(a,'\0');
alter table t1 modify a binary(5);
select hex(a) from t1 order by a;
select hex(concat(a,'\0')) as b from t1 order by concat(a,'\0');
drop table t1;

View file

@ -6402,6 +6402,17 @@ int Field_varstring::key_cmp(const byte *a,const byte *b)
void Field_varstring::sort_string(char *to,uint length)
{
uint tot_length= length_bytes == 1 ? (uint) (uchar) *ptr : uint2korr(ptr);
if (field_charset == &my_charset_bin)
{
/* Store length last in high-byte order to sort longer strings first */
if (length_bytes == 1)
to[length-1]= tot_length;
else
mi_int2store(to+length-2, tot_length);
length-= length_bytes;
}
tot_length= my_strnxfrm(field_charset,
(uchar*) to, length,
(uchar*) ptr + length_bytes,
@ -7093,6 +7104,13 @@ int Field_blob::key_cmp(const byte *a,const byte *b)
}
uint32 Field_blob::sort_length() const
{
return (uint32) (current_thd->variables.max_sort_length +
(field_charset == &my_charset_bin ? 0 : packlength));
}
void Field_blob::sort_string(char *to,uint length)
{
char *blob;
@ -7102,6 +7120,31 @@ void Field_blob::sort_string(char *to,uint length)
bzero(to,length);
else
{
if (field_charset == &my_charset_bin)
{
char *pos;
/*
Store length of blob last in blob to shorter blobs before longer blobs
*/
length-= packlength;
pos= to+length;
switch (packlength) {
case 1:
*pos= (char) blob_length;
break;
case 2:
mi_int2store(pos, blob_length);
break;
case 3:
mi_int3store(pos, blob_length);
break;
case 4:
mi_int4store(pos, blob_length);
break;
}
}
memcpy_fixed(&blob,ptr+packlength,sizeof(char*));
blob_length=my_strnxfrm(field_charset,

View file

@ -132,6 +132,7 @@ public:
virtual bool eq_def(Field *field);
virtual uint32 pack_length() const { return (uint32) field_length; }
virtual uint32 pack_length_in_rec() const { return pack_length(); }
virtual uint32 sort_length() const { return pack_length(); }
virtual void reset(void) { bzero(ptr,pack_length()); }
virtual void reset_fields() {}
virtual void set_default()
@ -1048,6 +1049,11 @@ public:
void reset(void) { bzero(ptr,field_length+length_bytes); }
uint32 pack_length() const { return (uint32) field_length+length_bytes; }
uint32 key_length() const { return (uint32) field_length; }
uint32 sort_length() const
{
return (uint32) field_length + (field_charset == &my_charset_bin ?
length_bytes : 0);
}
int store(const char *to,uint length,CHARSET_INFO *charset);
int store(longlong nr, bool unsigned_val);
int store(double nr) { return Field_str::store(nr); } /* QQ: To be deleted */
@ -1120,6 +1126,7 @@ public:
void sort_string(char *buff,uint length);
uint32 pack_length() const
{ return (uint32) (packlength+table->s->blob_ptr_size); }
uint32 sort_length() const;
inline uint32 max_data_length() const
{
return (uint32) (((ulonglong) 1 << (packlength*8)) -1);

View file

@ -50,7 +50,8 @@ static int merge_index(SORTPARAM *param,uchar *sort_buffer,
IO_CACHE *outfile);
static bool save_index(SORTPARAM *param,uchar **sort_keys, uint count,
FILESORT_INFO *table_sort);
static uint sortlength(SORT_FIELD *sortorder, uint s_length,
static uint suffix_length(ulong string_length);
static uint sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
bool *multi_byte_charset);
static SORT_ADDON_FIELD *get_addon_fields(THD *thd, Field **ptabfield,
uint sortlength, uint *plength);
@ -123,7 +124,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
sort_keys= (uchar **) NULL;
error= 1;
bzero((char*) &param,sizeof(param));
param.sort_length= sortlength(sortorder, s_length, &multi_byte_charset);
param.sort_length= sortlength(thd, sortorder, s_length, &multi_byte_charset);
param.ref_length= table->file->ref_length;
param.addon_field= 0;
param.addon_length= 0;
@ -585,6 +586,28 @@ err:
} /* write_keys */
/*
Store length as suffix in high-byte-first order
*/
static inline void store_length(uchar *to, uint length, uint pack_length)
{
switch (pack_length) {
case 1:
*to= (uchar) length;
break;
case 2:
mi_int2store(to, length);
break;
case 3:
mi_int3store(to, length);
default:
mi_int4store(to, length);
break;
}
}
/* makes a sort-key from record */
static void make_sortkey(register SORTPARAM *param,
@ -623,9 +646,11 @@ static void make_sortkey(register SORTPARAM *param,
maybe_null= item->maybe_null;
switch (sort_field->result_type) {
case STRING_RESULT:
{
{
CHARSET_INFO *cs=item->collation.collation;
char fill_char= ((cs->state & MY_CS_BINSORT) ? (char) 0 : ' ');
int diff;
uint sort_field_length;
if (maybe_null)
*to++=1;
@ -644,24 +669,32 @@ static void make_sortkey(register SORTPARAM *param,
}
break;
}
length=res->length();
int diff=(int) (sort_field->length-length);
length= res->length();
sort_field_length= sort_field->length - sort_field->suffix_length;
diff=(int) (sort_field_length - length);
if (diff < 0)
{
diff=0; /* purecov: inspected */
length=sort_field->length;
length= sort_field_length;
}
if (sort_field->suffix_length)
{
/* Store length last in result_string */
store_length(to + sort_field_length, length,
sort_field->suffix_length);
}
if (sort_field->need_strxnfrm)
{
char *from=(char*) res->ptr();
uint tmp_length;
if ((unsigned char *)from == to)
{
set_if_smaller(length,sort_field->length);
memcpy(param->tmp_buffer,from,length);
from=param->tmp_buffer;
}
uint tmp_length=my_strnxfrm(cs,to,sort_field->length,
(unsigned char *) from, length);
tmp_length= my_strnxfrm(cs,to,sort_field->length,
(unsigned char *) from, length);
DBUG_ASSERT(tmp_length == sort_field->length);
}
else
@ -670,7 +703,7 @@ static void make_sortkey(register SORTPARAM *param,
cs->cset->fill(cs, (char *)to+length,diff,fill_char);
}
break;
}
}
case INT_RESULT:
{
longlong value= item->val_int_result();
@ -1170,11 +1203,25 @@ static int merge_index(SORTPARAM *param, uchar *sort_buffer,
} /* merge_index */
static uint suffix_length(ulong string_length)
{
if (string_length < 256)
return 1;
if (string_length < 256L*256L)
return 2;
if (string_length < 256L*256L*256L)
return 3;
return 4; // Can't sort longer than 4G
}
/*
Calculate length of sort key
SYNOPSIS
sortlength()
thd Thread handler
sortorder Order of items to sort
uint s_length Number of items to sort
multi_byte_charset (out)
@ -1190,10 +1237,10 @@ static int merge_index(SORTPARAM *param, uchar *sort_buffer,
*/
static uint
sortlength(SORT_FIELD *sortorder, uint s_length, bool *multi_byte_charset)
sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
bool *multi_byte_charset)
{
reg2 uint length;
THD *thd= current_thd;
CHARSET_INFO *cs;
*multi_byte_charset= 0;
@ -1201,19 +1248,17 @@ sortlength(SORT_FIELD *sortorder, uint s_length, bool *multi_byte_charset)
for (; s_length-- ; sortorder++)
{
sortorder->need_strxnfrm= 0;
sortorder->suffix_length= 0;
if (sortorder->field)
{
if (sortorder->field->type() == FIELD_TYPE_BLOB)
sortorder->length= thd->variables.max_sort_length;
else
cs= sortorder->field->sort_charset();
sortorder->length= sortorder->field->sort_length();
if (use_strnxfrm((cs=sortorder->field->sort_charset())))
{
sortorder->length=sortorder->field->pack_length();
if (use_strnxfrm((cs=sortorder->field->sort_charset())))
{
sortorder->need_strxnfrm= 1;
*multi_byte_charset= 1;
sortorder->length= cs->coll->strnxfrmlen(cs, sortorder->length);
}
sortorder->need_strxnfrm= 1;
*multi_byte_charset= 1;
sortorder->length= cs->coll->strnxfrmlen(cs, sortorder->length);
}
if (sortorder->field->maybe_null())
length++; // Place for NULL marker
@ -1229,6 +1274,12 @@ sortlength(SORT_FIELD *sortorder, uint s_length, bool *multi_byte_charset)
sortorder->need_strxnfrm= 1;
*multi_byte_charset= 1;
}
else if (cs == &my_charset_bin)
{
/* Store length last to be able to sort blob/varbinary */
sortorder->suffix_length= suffix_length(sortorder->length);
sortorder->length+= sortorder->suffix_length;
}
break;
case INT_RESULT:
#if SIZEOF_LONG_LONG > 4

View file

@ -1899,6 +1899,7 @@ typedef struct st_sort_field {
Field *field; /* Field to sort */
Item *item; /* Item if not sorting fields */
uint length; /* Length of sort field */
uint suffix_length; /* Length suffix (0-4) */
Item_result result_type; /* Type of item */
bool reverse; /* if descending sort */
bool need_strxnfrm; /* If we have to use strxnfrm() */

View file

@ -11410,7 +11410,7 @@ static int remove_dup_with_hash_index(THD *thd, TABLE *table,
ulong total_length= 0;
for (ptr= first_field, field_length=field_lengths ; *ptr ; ptr++)
{
uint length= (*ptr)->pack_length();
uint length= (*ptr)->sort_length();
(*field_length++)= length;
total_length+= length;
}

View file

@ -373,29 +373,14 @@ static int my_wildcmp_bin(CHARSET_INFO *cs,
}
uint my_strnxfrmlen_bin(CHARSET_INFO *cs __attribute__((unused)), uint len)
{
return len + 2;
}
static int my_strnxfrm_bin(CHARSET_INFO *cs __attribute__((unused)),
uchar * dest, uint dstlen,
const uchar *src, uint srclen)
{
if (dest != src)
memcpy(dest, src, min(dstlen,srclen));
if (dstlen >= srclen + 2)
{
if (dstlen > srclen + 2)
bfill(dest + srclen, dstlen - srclen - 2, 0);
dest[dstlen-2]= srclen >> 8;
dest[dstlen-1]= srclen & 0xFF;
}
else if (dstlen > srclen)
if (dstlen > srclen)
bfill(dest + srclen, dstlen - srclen, 0);
return dstlen;
}
@ -496,7 +481,7 @@ static MY_COLLATION_HANDLER my_collation_binary_handler =
my_strnncoll_binary,
my_strnncollsp_binary,
my_strnxfrm_bin,
my_strnxfrmlen_bin,
my_strnxfrmlen_simple,
my_like_range_simple,
my_wildcmp_bin,
my_strcasecmp_bin,
@ -539,7 +524,7 @@ static MY_CHARSET_HANDLER my_charset_handler=
CHARSET_INFO my_charset_bin =
{
63,0,0, /* number */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_PRIMARY|MY_CS_STRNXFRM,/* state */
MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_PRIMARY,/* state */
"binary", /* cs name */
"binary", /* name */
"", /* comment */