A cleanup for MDEV-27896 Wrong result upon COLLATE latin1_bin CHARACTER SET latin1 on the table or the database level

Changing the error messages in a statement like this:

CREATE DATABASE db1
         COLLATE utf8mb4_bin
         CHARACTER SET utf8mb4
         CHARACTER SET latin1;

from
  COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'latin1'

to a more expected:

  Conflicting declarations: 'CHARACTER SET utf8mb4' and 'CHARACTER SET latin1'

In order to do this:
- Adding a new type TYPE_CHARACTER_SET_COLLATE_EXACT into
  Lex_exact_charset_extended_collation_attrs_st

- Removing m_had_charset_exact from its descendant class
  Lex_extended_charset_extended_collation_attrs_st

Additional cleanup:
- Changing methods in Lex_exact_charset_extended_collation_attrs_st
  set_charset(), set_charset_collate_default(), set_charset_collate_binary()
  to get Lex_exact_charset instead CHARSET_INFO as a parameter,
  to guarantee that the argument is only CHARACTER SET and does not have
  any COLLATE clauses yet. This change is not directly related to
  the error message change.
This commit is contained in:
Alexander Barkov 2022-05-25 11:07:04 +04:00
parent 1ace1075dc
commit e9adc3959e
5 changed files with 86 additions and 61 deletions

View file

@ -654,11 +654,11 @@ result ERROR: Conflicting declarations: 'CHARACTER SET utf8mb4' and 'CHARACTER S
query CREATE DATABASE
attrs COLLATE DEFAULT CHARACTER SET utf8mb4 CHARACTER SET latin1
result ERROR: COLLATION 'utf8mb4_general_ci' is not valid for CHARACTER SET 'latin1'
result ERROR: Conflicting declarations: 'CHARACTER SET utf8mb4' and 'CHARACTER SET latin1'
query CREATE DATABASE
attrs COLLATE utf8mb4_bin CHARACTER SET utf8mb4 CHARACTER SET latin1
result ERROR: COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'latin1'
result ERROR: Conflicting declarations: 'CHARACTER SET utf8mb4' and 'CHARACTER SET latin1'
query CREATE DATABASE
attrs COLLATE latin1_swedish_ci CHARACTER SET utf8mb4 CHARACTER SET latin1
@ -674,7 +674,7 @@ result ERROR: Conflicting declarations: 'CHARACTER SET DEFAULT (utf8mb4)' and 'C
query CREATE DATABASE
attrs CHARACTER SET utf8mb4 COLLATE DEFAULT CHARACTER SET latin1
result ERROR: COLLATION 'utf8mb4_general_ci' is not valid for CHARACTER SET 'latin1'
result ERROR: Conflicting declarations: 'CHARACTER SET utf8mb4' and 'CHARACTER SET latin1'
query CREATE DATABASE
attrs COLLATE utf8mb4_bin COLLATE DEFAULT CHARACTER SET latin1
@ -694,7 +694,7 @@ result ERROR: COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'latin1'
query CREATE DATABASE
attrs CHARACTER SET utf8mb4 COLLATE utf8mb4_bin CHARACTER SET latin1
result ERROR: COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'latin1'
result ERROR: Conflicting declarations: 'CHARACTER SET utf8mb4' and 'CHARACTER SET latin1'
query CREATE DATABASE
attrs COLLATE DEFAULT COLLATE utf8mb4_bin CHARACTER SET latin1
@ -774,7 +774,7 @@ result ERROR: Conflicting declarations: 'CHARACTER SET utf8mb4' and 'CHARACTER S
query CREATE DATABASE
attrs COLLATE DEFAULT CHARACTER SET latin1 CHARACTER SET utf8mb4
result ERROR: COLLATION 'latin1_swedish_ci' is not valid for CHARACTER SET 'utf8mb4'
result ERROR: Conflicting declarations: 'CHARACTER SET latin1' and 'CHARACTER SET utf8mb4'
query CREATE DATABASE
attrs COLLATE utf8mb4_bin CHARACTER SET latin1 CHARACTER SET utf8mb4
@ -782,11 +782,11 @@ result ERROR: COLLATION 'utf8mb4_bin' is not valid for CHARACTER SET 'latin1'
query CREATE DATABASE
attrs COLLATE latin1_swedish_ci CHARACTER SET latin1 CHARACTER SET utf8mb4
result ERROR: COLLATION 'latin1_swedish_ci' is not valid for CHARACTER SET 'utf8mb4'
result ERROR: Conflicting declarations: 'CHARACTER SET latin1' and 'CHARACTER SET utf8mb4'
query CREATE DATABASE
attrs COLLATE latin1_bin CHARACTER SET latin1 CHARACTER SET utf8mb4
result ERROR: COLLATION 'latin1_bin' is not valid for CHARACTER SET 'utf8mb4'
result ERROR: Conflicting declarations: 'CHARACTER SET latin1' and 'CHARACTER SET utf8mb4'
query CREATE DATABASE
attrs CHARACTER SET latin1 CHARACTER SET utf8mb4 CHARACTER SET utf8mb4
@ -802,7 +802,7 @@ result ERROR: COLLATION 'latin1_bin' is not valid for CHARACTER SET 'utf8mb4'
query CREATE DATABASE
attrs CHARACTER SET latin1 COLLATE DEFAULT CHARACTER SET utf8mb4
result ERROR: COLLATION 'latin1_swedish_ci' is not valid for CHARACTER SET 'utf8mb4'
result ERROR: Conflicting declarations: 'CHARACTER SET latin1' and 'CHARACTER SET utf8mb4'
query CREATE DATABASE
attrs COLLATE utf8mb4_bin COLLATE DEFAULT CHARACTER SET utf8mb4
@ -838,7 +838,7 @@ result ERROR: COLLATION 'latin1_swedish_ci' is not valid for CHARACTER SET 'utf8
query CREATE DATABASE
attrs CHARACTER SET latin1 COLLATE latin1_swedish_ci CHARACTER SET utf8mb4
result ERROR: COLLATION 'latin1_swedish_ci' is not valid for CHARACTER SET 'utf8mb4'
result ERROR: Conflicting declarations: 'CHARACTER SET latin1' and 'CHARACTER SET utf8mb4'
query CREATE DATABASE
attrs CHARACTER SET utf8mb4 COLLATE latin1_swedish_ci CHARACTER SET utf8mb4
@ -866,7 +866,7 @@ result ERROR: COLLATION 'latin1_bin' is not valid for CHARACTER SET 'utf8mb4'
query CREATE DATABASE
attrs CHARACTER SET latin1 COLLATE latin1_bin CHARACTER SET utf8mb4
result ERROR: COLLATION 'latin1_bin' is not valid for CHARACTER SET 'utf8mb4'
result ERROR: Conflicting declarations: 'CHARACTER SET latin1' and 'CHARACTER SET utf8mb4'
query CREATE DATABASE
attrs CHARACTER SET utf8mb4 COLLATE latin1_bin CHARACTER SET utf8mb4

View file

@ -72,6 +72,20 @@ bool Lex_exact_charset::
}
bool Lex_exact_charset_opt_extended_collate::
raise_if_charsets_differ(const Lex_exact_charset &cs) const
{
if (!my_charset_same(m_ci, cs.charset_info()))
{
my_error(ER_CONFLICTING_DECLARATIONS, MYF(0),
"CHARACTER SET ", m_ci->cs_name.str,
"CHARACTER SET ", cs.charset_info()->cs_name.str);
return true;
}
return false;
}
bool Lex_exact_charset_opt_extended_collate::
raise_if_not_applicable(const Lex_exact_collation &cl) const
{
@ -386,8 +400,7 @@ CHARSET_INFO *Lex_exact_charset_extended_collation_attrs_st::
case TYPE_EMPTY:
return def;
case TYPE_CHARACTER_SET:
DBUG_ASSERT(m_ci);
return m_ci;
case TYPE_CHARACTER_SET_COLLATE_EXACT:
case TYPE_COLLATE_EXACT:
DBUG_ASSERT(m_ci);
return m_ci;
@ -418,11 +431,13 @@ bool Lex_exact_charset_extended_collation_attrs_st::
case TYPE_CHARACTER_SET:
{
// CHARACTER SET latin1 .. COLLATE latin1_swedish_ci
if (Lex_exact_charset(m_ci).raise_if_not_applicable(cl))
Lex_exact_charset_opt_extended_collate tmp(m_ci, false);
if (tmp.merge_exact_collation(cl))
return true;
*this= Lex_exact_charset_extended_collation_attrs(cl);
*this= Lex_exact_charset_extended_collation_attrs(tmp);
return false;
}
case TYPE_CHARACTER_SET_COLLATE_EXACT:
case TYPE_COLLATE_EXACT:
{
// [CHARACTER SET latin1] COLLATE latin1_bin .. COLLATE latin1_bin
@ -460,9 +475,10 @@ bool Lex_exact_charset_extended_collation_attrs_st::
Lex_exact_charset_opt_extended_collate tmp(m_ci, false);
if (tmp.merge_context_collation(cl))
return true;
*this= Lex_exact_charset_extended_collation_attrs(tmp.collation());
*this= Lex_exact_charset_extended_collation_attrs(tmp);
return false;
}
case TYPE_CHARACTER_SET_COLLATE_EXACT:
case TYPE_COLLATE_EXACT:
// [CHARACTER SET latin1] COLLATE latin1_swedish_ci .. COLLATE DEFAULT
return Lex_exact_collation(m_ci).
@ -538,7 +554,15 @@ bool Lex_exact_charset_extended_collation_attrs_st::
case TYPE_COLLATE_EXACT:
// COLLATE latin1_bin .. CHARACTER SET cs
return cs.raise_if_not_applicable(Lex_exact_collation(m_ci));
if (cs.raise_if_not_applicable(Lex_exact_collation(m_ci)))
return true;
m_type= TYPE_CHARACTER_SET_COLLATE_EXACT;
return false;
case TYPE_CHARACTER_SET_COLLATE_EXACT:
// CHARACTER SET cs1 COLLATE cl .. CHARACTER SET cs2
return Lex_exact_charset_opt_extended_collate(m_ci, true).
raise_if_charsets_differ(cs);
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
// COLLATE DEFAULT .. CHARACTER SET cs
@ -546,7 +570,7 @@ bool Lex_exact_charset_extended_collation_attrs_st::
Lex_exact_charset_opt_extended_collate tmp(cs);
if (tmp.merge_context_collation(Lex_context_collation(m_ci)))
return true;
*this= Lex_exact_charset_extended_collation_attrs(tmp.collation());
*this= Lex_exact_charset_extended_collation_attrs(tmp);
return false;
}
}
@ -567,7 +591,6 @@ bool Lex_extended_charset_extended_collation_attrs_st::merge_charset_default()
bool Lex_extended_charset_extended_collation_attrs_st::
merge_exact_charset(const Lex_exact_charset &cs)
{
m_had_charset_exact= true;
if (m_charset_order == CHARSET_TYPE_EMPTY)
m_charset_order= CHARSET_TYPE_EXACT;
return Lex_exact_charset_extended_collation_attrs_st::merge_exact_charset(cs);
@ -621,6 +644,7 @@ Lex_extended_charset_extended_collation_attrs_st::
}
return m_ci;
case TYPE_CHARACTER_SET_COLLATE_EXACT:
case TYPE_COLLATE_EXACT:
{
/*
@ -629,7 +653,7 @@ Lex_extended_charset_extended_collation_attrs_st::
[ CHARACTER SET cs_exact ]
CHARACTER SET DEFAULT;
*/
if (m_had_charset_exact &&
if (m_type == TYPE_CHARACTER_SET_COLLATE_EXACT &&
raise_if_charset_conflicts_with_default(ctx.charset_default()))
{
/*

View file

@ -203,6 +203,7 @@ public:
bool with_collate() const { return m_with_collate; }
CHARSET_INFO *find_bin_collation() const;
CHARSET_INFO *find_default_collation() const;
bool raise_if_charsets_differ(const Lex_exact_charset &cs) const;
bool raise_if_not_applicable(const Lex_exact_collation &cl) const;
/*
Add another COLLATE clause (exact or context).
@ -281,14 +282,17 @@ public:
TYPE_EMPTY= 0,
TYPE_CHARACTER_SET= 1,
TYPE_COLLATE_EXACT= 2,
TYPE_COLLATE_CONTEXTUALLY_TYPED= 3
TYPE_CHARACTER_SET_COLLATE_EXACT= 3,
TYPE_COLLATE_CONTEXTUALLY_TYPED= 4
};
// Number of bits required to store enum Type values
#define LEX_CHARSET_COLLATION_TYPE_BITS 2
static_assert(((1<<LEX_CHARSET_COLLATION_TYPE_BITS)-1) >=
TYPE_COLLATE_CONTEXTUALLY_TYPED,
#define LEX_CHARSET_COLLATION_TYPE_BITS 3
#define LEX_CHARSET_COLLATION_TYPE_MASK ((1<<LEX_CHARSET_COLLATION_TYPE_BITS)-1)
static_assert(LEX_CHARSET_COLLATION_TYPE_MASK >=
TYPE_COLLATE_CONTEXTUALLY_TYPED,
"Lex_exact_charset_extended_collation_attrs_st::Type bits");
protected:
@ -330,37 +334,37 @@ public:
}
void init(const Lex_exact_charset_opt_extended_collate &cscl)
{
cscl.with_collate() ? init(cscl.collation()) :
init(cscl.charset());
if (cscl.with_collate())
init(cscl.collation().charset_info(), TYPE_CHARACTER_SET_COLLATE_EXACT);
else
init(cscl.charset());
}
bool is_empty() const
{
return m_type == TYPE_EMPTY;
}
void set_charset(CHARSET_INFO *cs)
void set_charset(const Lex_exact_charset &cs)
{
DBUG_ASSERT(cs);
m_ci= cs;
m_ci= cs.charset_info();
m_type= TYPE_CHARACTER_SET;
}
bool set_charset_collate_default(CHARSET_INFO *cs)
bool set_charset_collate_default(const Lex_exact_charset &cs)
{
DBUG_ASSERT(cs);
if (!(cs= Lex_exact_charset_opt_extended_collate(cs, true).
CHARSET_INFO *ci;
if (!(ci= Lex_exact_charset_opt_extended_collate(cs).
find_default_collation()))
return true;
m_ci= cs;
m_type= TYPE_COLLATE_EXACT;
m_ci= ci;
m_type= TYPE_CHARACTER_SET_COLLATE_EXACT;
return false;
}
bool set_charset_collate_binary(CHARSET_INFO *cs)
bool set_charset_collate_binary(const Lex_exact_charset &cs)
{
DBUG_ASSERT(cs);
if (!(cs= Lex_exact_charset_opt_extended_collate(cs, true).
find_bin_collation()))
CHARSET_INFO *ci;
if (!(ci= Lex_exact_charset_opt_extended_collate(cs).find_bin_collation()))
return true;
m_ci= cs;
m_type= TYPE_COLLATE_EXACT;
m_ci= ci;
m_type= TYPE_CHARACTER_SET_COLLATE_EXACT;
return false;
}
void set_collate_default()
@ -407,6 +411,7 @@ public:
return merge_exact_collation(Lex_exact_collation(cl.charset_info()));
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
case TYPE_CHARACTER_SET:
case TYPE_CHARACTER_SET_COLLATE_EXACT:
break;
}
DBUG_ASSERT(0);
@ -430,6 +435,7 @@ public:
return merge_exact_collation(Lex_exact_collation(cl.charset_info()));
case TYPE_COLLATE_CONTEXTUALLY_TYPED:
case TYPE_CHARACTER_SET:
case TYPE_CHARACTER_SET_COLLATE_EXACT:
break;
}
DBUG_ASSERT(0);
@ -527,33 +533,18 @@ class Lex_extended_charset_extended_collation_attrs_st:
order of CHARACTER SET clauses in case of conflicts.
*/
charset_type_t m_charset_order;
/*
The parent class Lex_exact_charset_extended_collation_attrs_st
does not let know if a "COLLATE cl_exact" was used in combination with
"CHARACTER SET cs_exact" or just alone.
Here we need to distinguish:
- CHARACTER SET cs_exact COLLATE cl_exact, or
- COLLATE cl_exact CHARACTER SET cs_exact
versus just:
- COLLATE cl_exact
to produce better error messages in case of conflicts.
So let's add a flag member:
*/
bool m_had_charset_exact;
public:
void init()
{
Lex_opt_context_charset_st::init();
Lex_exact_charset_extended_collation_attrs_st::init();
m_charset_order= CHARSET_TYPE_EMPTY;
m_had_charset_exact= false;
}
void init(const Lex_exact_charset_opt_extended_collate &c)
{
Lex_opt_context_charset_st::init();
Lex_exact_charset_extended_collation_attrs_st::init(c);
m_charset_order= CHARSET_TYPE_EXACT;
m_had_charset_exact= true;
}
bool is_empty() const
{

View file

@ -6506,22 +6506,28 @@ opt_binary:
;
binary:
BYTE_SYM { $$.set_charset(&my_charset_bin); }
| charset_or_alias { $$.set_charset($1); }
BYTE_SYM
{
$$.set_charset(Lex_exact_charset(&my_charset_bin));
}
| charset_or_alias
{
$$.set_charset(Lex_exact_charset($1));
}
| charset_or_alias BINARY
{
if ($$.set_charset_collate_binary($1))
if ($$.set_charset_collate_binary(Lex_exact_charset($1)))
MYSQL_YYABORT;
}
| BINARY { $$.set_contextually_typed_binary_style(); }
| BINARY charset_or_alias
{
if ($$.set_charset_collate_binary($2))
if ($$.set_charset_collate_binary(Lex_exact_charset($2)))
MYSQL_YYABORT;
}
| charset_or_alias COLLATE_SYM DEFAULT
{
$$.set_charset_collate_default($1);
$$.set_charset_collate_default(Lex_exact_charset($1));
}
| charset_or_alias COLLATE_SYM collation_name
{

View file

@ -704,7 +704,9 @@ public:
m_handler= handler;
m_ci= coll.charset_info();
Lex_length_and_dec_st::operator=(length_and_dec);
m_collation_type= ((uint8) coll.type()) & 0x3;
// Using bit-and to avoid the warning:
// conversion from uint8 to unsigned char:3 may change value
m_collation_type= ((uint8) coll.type()) & LEX_CHARSET_COLLATION_TYPE_MASK;
}
void set(const Type_handler *handler,
const Lex_column_charset_collation_attrs_st &coll)
@ -712,7 +714,9 @@ public:
m_handler= handler;
m_ci= coll.charset_info();
Lex_length_and_dec_st::reset();
m_collation_type= ((uint8) coll.type()) & 0x3;
// Using bit-and to avoid the warning:
// conversion from uint8 to unsigned char:3 may change value
m_collation_type= ((uint8) coll.type()) & LEX_CHARSET_COLLATION_TYPE_MASK;
}
void set(const Type_handler *handler, CHARSET_INFO *cs= NULL)
{