MDEV-26713 Set activeCodePage=UTF8 for windows programs

- Use corresponding entry in the manifest, as described in
https://docs.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page

- If if ANSI codepage is UTF8 (i.e for Windows 1903 and later)
  Use UTF8 as default client charset
  Set console codepage(s) to UTF8, in case process is using console

- Allow some previously disabled MTR tests, that used Unicode for in "exec",
  for the recent Windows versions
This commit is contained in:
Vladislav Vaintroub 2021-11-22 12:29:15 +01:00 committed by Sergei Golubchik
parent 4d3ac32848
commit ba9d231b5a
13 changed files with 174 additions and 10 deletions

View file

@ -19,4 +19,9 @@
</application>
</compatibility>
<application>
<windowsSettings>
<activeCodePage xmlns="http://schemas.microsoft.com/SMI/2019/WindowsSettings">UTF-8</activeCodePage>
</windowsSettings>
</application>
</asmv1:assembly>

View file

@ -0,0 +1,3 @@
# Check if utf8 can be used on the command line for --exec
# The real check is done in the suite.pm
#

View file

@ -0,0 +1,3 @@
# Check if utf8 can't be used on the command line for --exec
# The real check is done in the suite.pm
#

View file

@ -1,2 +1,3 @@
--source include/windows.inc
--source include/no_utf8_cli.inc
--exec chcp 1257 > NUL && $MYSQL --default-character-set=auto -e "select @@character_set_client"

View file

@ -0,0 +1,2 @@
@@character_set_client
utf8mb4

View file

@ -0,0 +1,3 @@
--source include/windows.inc
--source include/check_utf8_cli.inc
--exec $MYSQL --default-character-set=auto -e "select @@character_set_client"

View file

@ -1,6 +1,5 @@
# UTF8 parameters to mysql client do not work on Windows
--source include/not_windows.inc
--source include/not_embedded.inc
--source include/check_utf8_cli.inc
#
# Bug#21432 Database/Table name limited to 64 bytes, not chars, problems with multi-byte

View file

@ -87,6 +87,28 @@ sub skip_combinations {
$skip{'main/ssl_verify_ip.test'} = 'x509v3 support required'
unless $openssl_ver ge "1.0.2";
sub utf8_command_line_ok() {
if (IS_WINDOWS) {
# Can use UTF8 on command line since Windows 10 1903 (10.0.18362)
# or if OS codepage is set to UTF8
my($os_name, $os_major, $os_minor, $os_build, $os_id) = Win32::GetOSVersion();
if($os_major lt 10){
return 0;
} elsif($os_major gt 10 or $os_minor gt 0 or $os_build ge 18362){
return 1;
} elsif(Win32::GetACP() eq 65001) {
return 1;
}
return 0;
}
return 1;
}
$skip{'include/check_utf8_cli.inc'} = 'No utf8 command line support'
unless utf8_command_line_ok();
$skip{'include/no_utf8_cli.inc'} = 'Not tested with utf8 command line support'
unless !utf8_command_line_ok();
%skip;
}

View file

@ -1517,9 +1517,15 @@ const char* my_default_csname()
const char* csname = NULL;
#ifdef _WIN32
char cpbuf[64];
int cp = GetConsoleCP();
if (cp == 0)
cp = GetACP();
UINT cp;
if (GetACP() == CP_UTF8)
cp= CP_UTF8;
else
{
cp= GetConsoleCP();
if (cp == 0)
cp= GetACP();
}
snprintf(cpbuf, sizeof(cpbuf), "cp%d", (int)cp);
csname = my_os_charset_to_mysql_charset(cpbuf);
#elif defined(HAVE_SETLOCALE) && defined(HAVE_NL_LANGINFO)

View file

@ -100,7 +100,7 @@ char *get_tty_password(const char *opt_message)
/*
Allocate output string, and convert UTF16 password to output codepage.
*/
cp= GetConsoleCP();
cp= GetACP() == CP_UTF8 ? CP_UTF8 : GetConsoleCP();
if (!(to_len= WideCharToMultiByte(cp, 0, wbuf, -1, NULL, 0, NULL, NULL)))
DBUG_RETURN(NULL);

View file

@ -38,7 +38,7 @@ static double getopt_double(char *arg, const struct my_option *optp, int *err);
static void init_variables(const struct my_option *, init_func_p);
static void init_one_value(const struct my_option *, void *, longlong);
static void fini_one_value(const struct my_option *, void *, longlong);
static int setval(const struct my_option *, void *, char *, my_bool);
static int setval(const struct my_option *, void *, char *, my_bool, const char *);
static char *check_struct_option(char *cur_arg, char *key_name);
/*
@ -133,6 +133,50 @@ double getopt_ulonglong2double(ulonglong v)
return u.dbl;
}
#ifdef _WIN32
/**
On Windows, if program is running in UTF8 mode, but some arguments are not UTF8.
This will mostly likely be a sign of old "ANSI" my.ini, and it is likely that
something will go wrong, e.g file access error.
*/
static void validate_value(const char *key, const char *value,
const char *filename)
{
MY_STRCOPY_STATUS status;
const struct charset_info_st *cs= &my_charset_utf8mb4_bin;
size_t len;
if (GetACP() != CP_UTF8)
return;
if (!(len= strlen(value)))
return;
cs->cset->well_formed_char_length(cs, value, value + len, len, &status);
if (!status.m_well_formed_error_pos)
return;
if (filename && *filename)
{
my_getopt_error_reporter(WARNING_LEVEL,
"%s: invalid (non-UTF8) characters found for option '%s'"
" in file '%s'",
my_progname, key, filename);
}
else
{
/*
Should never happen, non-UTF8 can be read from option's
file only.
*/
DBUG_ASSERT(0);
my_getopt_error_reporter(
WARNING_LEVEL, "%s: invalid (non-UTF8) characters for option %s",
my_progname, key);
}
}
#else
#define validate_value(key, value, filename) (void)filename
#endif
/**
Handle command line options.
Sort options.
@ -564,7 +608,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
}
}
if ((error= setval(optp, optp->value, argument,
set_maximum_value)))
set_maximum_value,filename)))
DBUG_RETURN(error);
if (get_one_option(optp, argument, filename))
DBUG_RETURN(EXIT_UNSPECIFIED_ERROR);
@ -610,7 +654,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts,
continue;
}
if ((!option_is_autoset) &&
((error= setval(optp, value, argument, set_maximum_value))) &&
((error= setval(optp, value, argument, set_maximum_value,filename))) &&
!option_is_loose)
DBUG_RETURN(error);
if (get_one_option(optp, argument, filename))
@ -711,7 +755,7 @@ static my_bool get_bool_argument(const struct my_option *opts,
*/
static int setval(const struct my_option *opts, void *value, char *argument,
my_bool set_maximum_value)
my_bool set_maximum_value, const char *option_file)
{
int err= 0, res= 0;
DBUG_ENTER("setval");
@ -858,6 +902,7 @@ static int setval(const struct my_option *opts, void *value, char *argument,
goto ret;
};
}
validate_value(opts->name, argument, option_file);
DBUG_RETURN(0);
ret:

View file

@ -34,6 +34,7 @@
#endif
static void my_win_init(void);
static my_bool win32_init_tcp_ip();
static void setup_codepages();
#else
#define my_win_init()
#endif
@ -67,6 +68,69 @@ static ulong atoi_octal(const char *str)
MYSQL_FILE *mysql_stdin= NULL;
static MYSQL_FILE instrumented_stdin;
#ifdef _WIN32
static UINT orig_console_cp, orig_console_output_cp;
static void reset_console_cp(void)
{
/*
We try not to call SetConsoleCP unnecessarily, to workaround a bug on
older Windows 10 (1803), which could switch truetype console fonts to
raster, eventhough SetConsoleCP would be a no-op (switch from UTF8 to UTF8).
*/
if (GetConsoleCP() != orig_console_cp)
SetConsoleCP(orig_console_cp);
if (GetConsoleOutputCP() != orig_console_output_cp)
SetConsoleOutputCP(orig_console_output_cp);
}
/*
The below fixes discrepancies in console output and
command line parameter encoding. command line is in
ANSI codepage, output to console by default is in OEM, but
we like them to be in the same encoding.
We do this only if current codepage is UTF8, i.e when we
know we're on Windows that can handle UTF8 well.
*/
static void setup_codepages()
{
UINT acp;
BOOL is_a_tty= fileno(stdout) >= 0 && isatty(fileno(stdout));
if (is_a_tty)
{
/*
Save console codepages, in case we change them,
to restore them on exit.
*/
orig_console_cp= GetConsoleCP();
orig_console_output_cp= GetConsoleOutputCP();
if (orig_console_cp && orig_console_output_cp)
atexit(reset_console_cp);
}
if ((acp= GetACP()) != CP_UTF8)
return;
/*
Use setlocale to make mbstowcs/mkdir/getcwd behave, see
https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/setlocale-wsetlocale
*/
setlocale(LC_ALL, "en_US.UTF8");
if (is_a_tty && (orig_console_cp != acp || orig_console_output_cp != acp))
{
/*
If ANSI codepage is UTF8, we actually want to switch console
to it as well.
*/
SetConsoleCP(acp);
SetConsoleOutputCP(acp);
}
}
#endif
/**
Initialize my_sys functions, resources and variables
@ -337,6 +401,17 @@ static void my_win_init(void)
_tzset();
/*
We do not want text translation (LF->CRLF)
when stdout is console/terminal, it is buggy
*/
if (fileno(stdout) >= 0 && isatty(fileno(stdout)))
(void)setmode(fileno(stdout), O_BINARY);
if (fileno(stderr) >= 0 && isatty(fileno(stderr)))
(void) setmode(fileno(stderr), O_BINARY);
setup_codepages();
DBUG_VOID_RETURN;
}