mirror of
https://github.com/MariaDB/server.git
synced 2025-01-29 02:05:57 +01:00
my_strnncoll_latin1_de rewritten from scratch. Bug with overlapping combos went away.
mysql-test/r/ctype_latin1_de.result: testing overlapping combos mysql-test/t/ctype_latin1_de.test: testing overlapping combos
This commit is contained in:
parent
47d9c95f9c
commit
9942e0c7c6
3 changed files with 56 additions and 108 deletions
|
@ -196,6 +196,9 @@ strcmp('
|
|||
select strcmp('u','öa'),strcmp('u','ö');
|
||||
strcmp('u','öa') strcmp('u','ö')
|
||||
1 1
|
||||
select strcmp('sä', 'ßa'), strcmp('aä', 'äx');
|
||||
strcmp('sä', 'ßa') strcmp('aä', 'äx')
|
||||
-1 -1
|
||||
create table t1 (a varchar(10), key(a), fulltext (a));
|
||||
insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
|
||||
select * from t1 where a like "abc%";
|
||||
|
|
|
@ -34,6 +34,10 @@ select strcmp('af','
|
|||
select strcmp('ßa','ss'),strcmp('ssa','ß'),strcmp('sssb','sßa'),strcmp('ß','s');
|
||||
select strcmp('u','öa'),strcmp('u','ö');
|
||||
|
||||
#
|
||||
# overlapping combo's
|
||||
#
|
||||
select strcmp('sä', 'ßa'), strcmp('aä', 'äx');
|
||||
#
|
||||
# Some other simple tests with the current character set
|
||||
#
|
||||
|
|
|
@ -102,6 +102,7 @@ uchar to_upper_latin1_de[] = {
|
|||
* Ü, ü, Ö, ö, Ä, ä
|
||||
*/
|
||||
|
||||
/* QQ: why it is necessary ? */
|
||||
uchar sort_order_latin1_de[] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
|
@ -121,6 +122,41 @@ uchar sort_order_latin1_de[] = {
|
|||
68, 78, 79, 79, 79, 79,214,247,216, 85, 85, 85,220, 89,222, 89
|
||||
};
|
||||
|
||||
|
||||
/* same as sort_order_latin_de, but maps ALL accented chars to unacented ones */
|
||||
uchar combo1map[]={
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
|
||||
96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
|
||||
65, 65, 65, 65, 65, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
|
||||
68, 78, 79, 79, 79, 79, 79,215,216, 85, 85, 85, 85, 89,222, 83,
|
||||
65, 65, 65, 65, 65, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
|
||||
68, 78, 79, 79, 79, 79, 79,247,216, 85, 85, 85, 85, 89,222, 89
|
||||
};
|
||||
|
||||
uchar combo2map[]={
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0,69, 0, 0, 0, 0, 0,69, 0, 0,83, 0, 0, 0, 0,69, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,69, 0, 0, 0, 0, 0,69, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
/* no longer needed
|
||||
#define L1_AE 196
|
||||
#define L1_ae 228
|
||||
#define L1_OE 214
|
||||
|
@ -128,7 +164,7 @@ uchar sort_order_latin1_de[] = {
|
|||
#define L1_UE 220
|
||||
#define L1_ue 252
|
||||
#define L1_ss 223
|
||||
|
||||
*/
|
||||
|
||||
/*
|
||||
Some notes about the following comparison rules:
|
||||
|
@ -143,95 +179,23 @@ uchar sort_order_latin1_de[] = {
|
|||
sort_order_latin_de[] at all
|
||||
*/
|
||||
|
||||
|
||||
#define CHECK_S1_COMBO(ch1, ch2, str1, str1_end, res_if_str1_smaller, str2, fst, snd, accent) \
|
||||
/* Invariant: ch1 == fst == sort_order_latin1_de[accent] && ch1 != ch2 */ \
|
||||
if (ch2 != accent) \
|
||||
{ \
|
||||
ch1= fst; \
|
||||
goto normal; \
|
||||
} \
|
||||
if (str1 == str1_end) \
|
||||
return res_if_str1_smaller; \
|
||||
{ \
|
||||
int diff = (int) sort_order_latin1_de[*str1] - snd; \
|
||||
if (diff) \
|
||||
return diff*(-(res_if_str1_smaller)); \
|
||||
/* They are equal (e.g., "Ae" == 'ä') */ \
|
||||
str1++; \
|
||||
}
|
||||
|
||||
|
||||
int my_strnncoll_latin1_de(const uchar * s1, int len1,
|
||||
const uchar * s2, int len2)
|
||||
{
|
||||
const uchar *e1 = s1 + len1;
|
||||
const uchar *e2 = s2 + len2;
|
||||
uchar c1, c12=0, c2, c22=0;
|
||||
|
||||
while (s1 < e1 && s2 < e2)
|
||||
while ((s1 < e1 || c12) && (s2 < e2 || c22))
|
||||
{
|
||||
/*
|
||||
Because sort_order_latin1_de doesn't convert 'Ä', Ü or ß we
|
||||
can use it here.
|
||||
*/
|
||||
uchar c1 = sort_order_latin1_de[*s1++];
|
||||
uchar c2 = sort_order_latin1_de[*s2++];
|
||||
if (c1 != c2)
|
||||
{
|
||||
switch (c1) {
|
||||
case 'A':
|
||||
CHECK_S1_COMBO(c1, c2, s1, e1, -1, s2, 'A', 'E', L1_AE);
|
||||
break;
|
||||
case 'O':
|
||||
CHECK_S1_COMBO(c1, c2, s1, e1, -1, s2, 'O', 'E', L1_OE);
|
||||
break;
|
||||
case 'U':
|
||||
CHECK_S1_COMBO(c1, c2, s1, e1, -1, s2, 'U', 'E', L1_UE);
|
||||
break;
|
||||
case 'S':
|
||||
CHECK_S1_COMBO(c1, c2, s1, e1, -1, s2, 'S', 'S', L1_ss);
|
||||
break;
|
||||
case L1_AE:
|
||||
CHECK_S1_COMBO(c1, c2, s2, e2, 1, s1, 'A', 'E', 'A');
|
||||
break;
|
||||
case L1_OE:
|
||||
CHECK_S1_COMBO(c1, c2, s2, e2, 1, s1, 'O', 'E', 'O');
|
||||
break;
|
||||
case L1_UE:
|
||||
CHECK_S1_COMBO(c1, c2, s2, e2, 1, s1, 'U', 'E', 'U');
|
||||
break;
|
||||
case L1_ss:
|
||||
CHECK_S1_COMBO(c1, c2, s2, e2, 1, s1, 'S', 'S', 'S');
|
||||
break;
|
||||
default:
|
||||
/*
|
||||
Handle the case where 'c2' is a special character
|
||||
If this is true, we know that c1 can't match this character.
|
||||
*/
|
||||
normal:
|
||||
switch (c2) {
|
||||
case L1_AE:
|
||||
return (int) c1 - (int) 'A';
|
||||
case L1_OE:
|
||||
return (int) c1 - (int) 'O';
|
||||
case L1_UE:
|
||||
return (int) c1 - (int) 'U';
|
||||
case L1_ss:
|
||||
return (int) c1 - (int) 'S';
|
||||
default:
|
||||
{
|
||||
int diff= (int) c1 - (int) c2;
|
||||
if (diff)
|
||||
return diff;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (c12) { c1=c12; c12=0; } else { c12=combo2map[*s1]; c1=combo1map[*s1++]; }
|
||||
if (c22) { c2=c22; c22=0; } else { c22=combo2map[*s2]; c2=combo1map[*s2++]; }
|
||||
if (c1 != c2) return (int)c1 - (int)c2;
|
||||
}
|
||||
|
||||
/* A simple test of string lengths won't work -- we test to see
|
||||
* which string ran out first */
|
||||
return s1 < e1 ? 1 : s2 < e2 ? -1 : 0;
|
||||
return (s1 < e1 || c12)? 1 : (s2 < e2 || c22)? -1 : 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -240,35 +204,12 @@ int my_strnxfrm_latin1_de(uchar * dest, const uchar * src, int len, int srclen)
|
|||
const uchar *dest_orig = dest;
|
||||
const uchar *de = dest + len;
|
||||
const uchar *se = src + srclen;
|
||||
while (src < se && dest < de)
|
||||
for ( ; src < se && dest < de ; src++)
|
||||
{
|
||||
uchar chr=sort_order_latin1_de[*src];
|
||||
switch (chr) {
|
||||
case L1_AE:
|
||||
*dest++ = 'A';
|
||||
if (dest < de)
|
||||
*dest++ = 'E';
|
||||
break;
|
||||
case L1_OE:
|
||||
*dest++ = 'O';
|
||||
if (dest < de)
|
||||
*dest++ = 'E';
|
||||
break;
|
||||
case L1_UE:
|
||||
*dest++ = 'U';
|
||||
if (dest < de)
|
||||
*dest++ = 'E';
|
||||
break;
|
||||
case L1_ss:
|
||||
*dest++ = 'S';
|
||||
if (dest < de)
|
||||
*dest++ = 'S';
|
||||
break;
|
||||
default:
|
||||
*dest++= chr;
|
||||
break;
|
||||
}
|
||||
++src;
|
||||
uchar chr=combo1map[*src];
|
||||
*dest++=chr;
|
||||
if ((chr=combo2map[*src]) && dest < de)
|
||||
*dest++=chr;
|
||||
}
|
||||
return dest - dest_orig;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue