MDEV-26713 set console codepage to what user set in --default-character-set

If someone on whatever reasons uses --default-character-set=cp850,
this will avoid incorrect display, and inserting incorrect data.

Adjusting console codepage sometimes also needs to happen with
--default-charset=auto, on older Windows. This is because autodetection
is not always exact. For example, console codepage on US editions of
Windows is 437. Client autodetects it as cp850, a rather loose
approximation, given 46 code point differences. We change the console
codepage to cp850, so that there is no discrepancy.

That fix is currently Windows-only, and serves people who used combination
of chcp to achieve WYSIWYG effect (although, this would mostly likely used
with utf8 in the past)

Now, --default-character-set would be a replacement for that.

Fix fs_character_set() detection of current codepage.
This commit is contained in:
Vladislav Vaintroub 2021-11-29 19:47:36 +01:00 committed by Sergei Golubchik
parent 74f2e6c85e
commit 9ea83f7fbd
7 changed files with 121 additions and 23 deletions

View file

@ -3304,6 +3304,21 @@ com_clear(String *buffer,char *line __attribute__((unused)))
return 0;
}
static void adjust_console_codepage(const char *name __attribute__((unused)))
{
#ifdef _WIN32
if (my_set_console_cp(name) < 0)
{
char buf[128];
snprintf(buf, sizeof(buf),
"WARNING: Could not determine Windows codepage for charset '%s',"
"continue using codepage %u", name, GetConsoleOutputCP());
put_info(buf, INFO_INFO);
}
#endif
}
/* ARGSUSED */
static int
com_charset(String *buffer __attribute__((unused)), char *line)
@ -3325,6 +3340,7 @@ com_charset(String *buffer __attribute__((unused)), char *line)
mysql_set_character_set(&mysql, charset_info->cs_name.str);
default_charset= (char *)charset_info->cs_name.str;
put_info("Charset changed", INFO_INFO);
adjust_console_codepage(charset_info->cs_name.str);
}
else put_info("Charset is not found", INFO_INFO);
return 0;
@ -4873,6 +4889,7 @@ sql_real_connect(char *host,char *database,char *user,char *password,
put_info(buff, INFO_ERROR);
return 1;
}
adjust_console_codepage(charset_info->cs_name.str);
connected=1;
#ifndef EMBEDDED_LIBRARY
mysql_options(&mysql, MYSQL_OPT_RECONNECT, &debug_info_flag);

View file

@ -438,6 +438,7 @@ int main(int argc,char *argv[])
mysql_options(&mysql,MYSQL_OPT_PROTOCOL,(char*)&opt_protocol);
if (!strcmp(default_charset,MYSQL_AUTODETECT_CHARSET_NAME))
default_charset= (char *)my_default_csname();
my_set_console_cp(default_charset);
mysql_options(&mysql, MYSQL_SET_CHARSET_NAME, default_charset);
error_flags= (myf)(opt_nobeep ? 0 : ME_BELL);

View file

@ -503,6 +503,7 @@ static int get_options(int *argc, char ***argv)
printf("Unsupported character set: %s\n", default_charset);
DBUG_RETURN(1);
}
my_set_console_cp(default_charset);
if (*argc > 0 && opt_alldbs)
{
printf("You should give only options, no arguments at all, with option\n");

View file

@ -525,6 +525,7 @@ static MYSQL *db_connect(char *host, char *database,
mysql_options(mysql, MYSQL_DEFAULT_AUTH, opt_default_auth);
if (!strcmp(default_charset,MYSQL_AUTODETECT_CHARSET_NAME))
default_charset= (char *)my_default_csname();
my_set_console_cp(default_charset);
mysql_options(mysql, MYSQL_SET_CHARSET_NAME, my_default_csname());
mysql_options(mysql, MYSQL_OPT_CONNECT_ATTR_RESET, 0);
mysql_options4(mysql, MYSQL_OPT_CONNECT_ATTR_ADD,

View file

@ -147,6 +147,7 @@ int main(int argc, char **argv)
if (!strcmp(default_charset,MYSQL_AUTODETECT_CHARSET_NAME))
default_charset= (char *)my_default_csname();
my_set_console_cp(default_charset);
mysql_options(&mysql, MYSQL_SET_CHARSET_NAME, default_charset);
if (opt_plugin_dir && *opt_plugin_dir)

View file

@ -1086,6 +1086,9 @@ extern char *get_tty_password(const char *opt_message);
#define BACKSLASH_MBTAIL
/* File system character set */
extern CHARSET_INFO *fs_character_set(void);
extern int my_set_console_cp(const char *name);
#else
#define my_set_console_cp(A) do {} while (0)
#endif
extern const char *my_default_csname(void);
extern size_t escape_quotes_for_mysql(CHARSET_INFO *charset_info,

View file

@ -1209,30 +1209,17 @@ size_t escape_string_for_mysql(CHARSET_INFO *charset_info,
#ifdef BACKSLASH_MBTAIL
static CHARSET_INFO *fs_cset_cache= NULL;
CHARSET_INFO *fs_character_set()
{
if (!fs_cset_cache)
{
char buf[10]= "cp";
GetLocaleInfo(LOCALE_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE,
buf+2, sizeof(buf)-3);
/*
We cannot call get_charset_by_name here
because fs_character_set() is executed before
LOCK_THD_charset mutex initialization, which
is used inside get_charset_by_name.
As we're now interested in cp932 only,
let's just detect it using strcmp().
*/
fs_cset_cache=
#ifdef HAVE_CHARSET_cp932
!strcmp(buf, "cp932") ? &my_charset_cp932_japanese_ci :
#endif
&my_charset_bin;
}
return fs_cset_cache;
static CHARSET_INFO *fs_cset_cache;
if (fs_cset_cache)
return fs_cset_cache;
#ifdef HAVE_CHARSET_cp932
else if (GetACP() == 932)
return fs_cset_cache= &my_charset_cp932_japanese_ci;
#endif
else
return fs_cset_cache= &my_charset_bin;
}
#endif
@ -1394,7 +1381,7 @@ static const MY_CSET_OS_NAME charsets[] =
{"cp54936", "gb18030", my_cs_exact},
#endif
{"cp65001", "utf8mb4", my_cs_exact},
{"cp65001", "utf8mb3", my_cs_approx},
#else /* not Windows */
{"646", "latin1", my_cs_approx}, /* Default on Solaris */
@ -1534,3 +1521,90 @@ const char* my_default_csname()
#endif
return csname ? csname : MYSQL_DEFAULT_CHARSET_NAME;
}
#ifdef _WIN32
/**
Extract codepage number from "cpNNNN" string,
and check that this codepage is supported.
@return 0 - invalid codepage(or unsupported)
> 0 - valid codepage number.
*/
static UINT get_codepage(const char *s)
{
UINT cp;
if (s[0] != 'c' || s[1] != 'p')
{
DBUG_ASSERT(0);
return 0;
}
cp= strtoul(s + 2, NULL, 10);
if (!IsValidCodePage(cp))
{
/*
Can happen also with documented CP, i.e 51936
Perhaps differs from one machine to another.
*/
return 0;
}
return cp;
}
static UINT mysql_charset_to_codepage(const char *my_cs_name)
{
const MY_CSET_OS_NAME *csp;
UINT cp=0,tmp;
for (csp= charsets; csp->os_name; csp++)
{
if (!strcasecmp(csp->my_name, my_cs_name))
{
switch (csp->param)
{
case my_cs_exact:
tmp= get_codepage(csp->os_name);
if (tmp)
return tmp;
break;
case my_cs_approx:
/*
don't return just yet, perhaps there is a better
(exact) match later.
*/
if (!cp)
cp= get_codepage(csp->os_name);
continue;
default:
return 0;
}
}
}
return cp;
}
/** Set console codepage for MariaDB's charset name */
int my_set_console_cp(const char *csname)
{
UINT cp;
if (fileno(stdout) < 0 || !isatty(fileno(stdout)))
return 0;
cp= mysql_charset_to_codepage(csname);
if (!cp)
{
/* No compatible os charset.*/
return -1;
}
if (GetConsoleOutputCP() != cp && !SetConsoleOutputCP(cp))
{
return -1;
}
if (GetConsoleCP() != cp && !SetConsoleCP(cp))
{
return -1;
}
return 0;
}
#endif