mirror of
https://github.com/MariaDB/server.git
synced 2025-01-28 01:34:17 +01:00
144 lines
3.9 KiB
C
144 lines
3.9 KiB
C
|
/*
|
||
|
Copyright (c) 2023, MariaDB Corporation.
|
||
|
|
||
|
This program is free software; you can redistribute it and/or modify
|
||
|
it under the terms of the GNU General Public License as published by
|
||
|
the Free Software Foundation; version 2 of the License.
|
||
|
|
||
|
This program is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
GNU General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU General Public License
|
||
|
along with this program; if not, write to the Free Software
|
||
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
|
||
|
|
||
|
#ifndef CSET_NARROWING_H_INCLUDED
|
||
|
#define CSET_NARROWING_H_INCLUDED
|
||
|
|
||
|
/*
|
||
|
A singleton class to provide "utf8mb3_from_mb4.charset()".
|
||
|
|
||
|
This is a variant of utf8mb3_general_ci that one can use when they have data
|
||
|
in MB4 and want to make index lookup keys in MB3.
|
||
|
*/
|
||
|
extern
|
||
|
class Charset_utf8narrow
|
||
|
{
|
||
|
struct my_charset_handler_st cset_handler;
|
||
|
struct charset_info_st cset;
|
||
|
public:
|
||
|
Charset_utf8narrow() :
|
||
|
cset_handler(*my_charset_utf8mb3_general_ci.cset),
|
||
|
cset(my_charset_utf8mb3_general_ci) /* Copy the CHARSET_INFO structure */
|
||
|
{
|
||
|
/* Insert our function wc_mb */
|
||
|
cset_handler.wc_mb= my_wc_mb_utf8mb4_bmp_only;
|
||
|
cset.cset=&cset_handler;
|
||
|
|
||
|
/* Charsets are compared by their name, so assign a different name */
|
||
|
LEX_CSTRING tmp= {STRING_WITH_LEN("utf8_mb4_to_mb3")};
|
||
|
cset.cs_name= tmp;
|
||
|
}
|
||
|
|
||
|
CHARSET_INFO *charset() { return &cset; }
|
||
|
|
||
|
} utf8mb3_from_mb4;
|
||
|
|
||
|
|
||
|
/*
|
||
|
A class to temporary change a field that uses utf8mb3_general_ci to enable
|
||
|
correct lookup key construction from string value in utf8mb4_general_ci
|
||
|
|
||
|
Intended usage:
|
||
|
|
||
|
// can do this in advance:
|
||
|
bool do_narrowing= Utf8_narrow::should_do_narrowing(field, value_cset);
|
||
|
...
|
||
|
|
||
|
// This sets the field to do narrowing if necessary:
|
||
|
Utf8_narrow narrow(field, do_narrowing);
|
||
|
|
||
|
// write to 'field' here
|
||
|
// item->save_in_field(field) or something else
|
||
|
|
||
|
// Stop doing narrowing
|
||
|
narrow.stop();
|
||
|
*/
|
||
|
|
||
|
class Utf8_narrow
|
||
|
{
|
||
|
Field *field;
|
||
|
DTCollation save_collation;
|
||
|
|
||
|
public:
|
||
|
static bool should_do_narrowing(const THD *thd, CHARSET_INFO *field_cset,
|
||
|
CHARSET_INFO *value_cset);
|
||
|
|
||
|
static bool should_do_narrowing(const Field *field, CHARSET_INFO *value_cset)
|
||
|
{
|
||
|
CHARSET_INFO *field_cset= field->charset();
|
||
|
THD *thd= field->table->in_use;
|
||
|
return should_do_narrowing(thd, field_cset, value_cset);
|
||
|
}
|
||
|
|
||
|
Utf8_narrow(Field *field_arg, bool is_applicable)
|
||
|
{
|
||
|
field= NULL;
|
||
|
if (is_applicable)
|
||
|
{
|
||
|
DTCollation mb3_from_mb4= utf8mb3_from_mb4.charset();
|
||
|
field= field_arg;
|
||
|
save_collation= field->dtcollation();
|
||
|
field->change_charset(mb3_from_mb4);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void stop()
|
||
|
{
|
||
|
if (field)
|
||
|
field->change_charset(save_collation);
|
||
|
#ifndef NDEBUG
|
||
|
field= NULL;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
~Utf8_narrow()
|
||
|
{
|
||
|
DBUG_ASSERT(!field);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
|
||
|
/*
|
||
|
@brief
|
||
|
Check if two fields can participate in a multiple equality using charset
|
||
|
narrowing.
|
||
|
|
||
|
@detail
|
||
|
Normally, check_simple_equality() checks this by calling:
|
||
|
|
||
|
left_field->eq_def(right_field)
|
||
|
|
||
|
This function does the same but takes into account we might use charset
|
||
|
narrowing:
|
||
|
- collations are not the same but rather an utf8mb{3,4}_general_ci pair
|
||
|
- for field lengths, should compare # characters, not #bytes.
|
||
|
*/
|
||
|
|
||
|
inline
|
||
|
bool fields_equal_using_narrowing(const THD *thd, const Field *left, const Field *right)
|
||
|
{
|
||
|
return
|
||
|
dynamic_cast<const Field_longstr*>(left) &&
|
||
|
dynamic_cast<const Field_longstr*>(right) &&
|
||
|
left->real_type() == right->real_type() &&
|
||
|
(Utf8_narrow::should_do_narrowing(left, right->charset()) ||
|
||
|
Utf8_narrow::should_do_narrowing(right, left->charset())) &&
|
||
|
left->char_length() == right->char_length();
|
||
|
};
|
||
|
|
||
|
|
||
|
#endif /* CSET_NARROWING_H_INCLUDED */
|