From ba9d231b5ab75bc3614e53bdd95026e5fe9dd565 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Mon, 22 Nov 2021 12:29:15 +0100 Subject: [PATCH] MDEV-26713 Set activeCodePage=UTF8 for windows programs - Use corresponding entry in the manifest, as described in https://docs.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page - If if ANSI codepage is UTF8 (i.e for Windows 1903 and later) Use UTF8 as default client charset Set console codepage(s) to UTF8, in case process is using console - Allow some previously disabled MTR tests, that used Unicode for in "exec", for the recent Windows versions --- cmake/win_compatibility.manifest | 5 ++ mysql-test/include/check_utf8_cli.inc | 3 + mysql-test/include/no_utf8_cli.inc | 3 + mysql-test/main/charset_client_win.test | 1 + .../main/charset_client_win_utf8mb4.result | 2 + .../main/charset_client_win_utf8mb4.test | 3 + ...t_windows.result => grant_utf8_cli.result} | 0 ...t_not_windows.test => grant_utf8_cli.test} | 3 +- mysql-test/suite.pm | 22 ++++++ mysys/charset.c | 12 ++- mysys/get_password.c | 2 +- mysys/my_getopt.c | 53 ++++++++++++- mysys/my_init.c | 75 +++++++++++++++++++ 13 files changed, 174 insertions(+), 10 deletions(-) create mode 100644 mysql-test/include/check_utf8_cli.inc create mode 100644 mysql-test/include/no_utf8_cli.inc create mode 100644 mysql-test/main/charset_client_win_utf8mb4.result create mode 100644 mysql-test/main/charset_client_win_utf8mb4.test rename mysql-test/main/{grant_not_windows.result => grant_utf8_cli.result} (100%) rename mysql-test/main/{grant_not_windows.test => grant_utf8_cli.test} (82%) diff --git a/cmake/win_compatibility.manifest b/cmake/win_compatibility.manifest index 2e4b27a6dc4..0e7ce667d68 100644 --- a/cmake/win_compatibility.manifest +++ b/cmake/win_compatibility.manifest @@ -19,4 +19,9 @@ + + + UTF-8 + + diff --git a/mysql-test/include/check_utf8_cli.inc b/mysql-test/include/check_utf8_cli.inc new file mode 100644 index 00000000000..a1fac216446 --- /dev/null +++ b/mysql-test/include/check_utf8_cli.inc @@ -0,0 +1,3 @@ +# Check if utf8 can be used on the command line for --exec +# The real check is done in the suite.pm +# diff --git a/mysql-test/include/no_utf8_cli.inc b/mysql-test/include/no_utf8_cli.inc new file mode 100644 index 00000000000..19f9aa6df42 --- /dev/null +++ b/mysql-test/include/no_utf8_cli.inc @@ -0,0 +1,3 @@ +# Check if utf8 can't be used on the command line for --exec +# The real check is done in the suite.pm +# diff --git a/mysql-test/main/charset_client_win.test b/mysql-test/main/charset_client_win.test index b4a21d4f0a4..c3f649cb7d4 100644 --- a/mysql-test/main/charset_client_win.test +++ b/mysql-test/main/charset_client_win.test @@ -1,2 +1,3 @@ --source include/windows.inc +--source include/no_utf8_cli.inc --exec chcp 1257 > NUL && $MYSQL --default-character-set=auto -e "select @@character_set_client" diff --git a/mysql-test/main/charset_client_win_utf8mb4.result b/mysql-test/main/charset_client_win_utf8mb4.result new file mode 100644 index 00000000000..f7b5d376f9a --- /dev/null +++ b/mysql-test/main/charset_client_win_utf8mb4.result @@ -0,0 +1,2 @@ +@@character_set_client +utf8mb4 diff --git a/mysql-test/main/charset_client_win_utf8mb4.test b/mysql-test/main/charset_client_win_utf8mb4.test new file mode 100644 index 00000000000..2baf0d7c050 --- /dev/null +++ b/mysql-test/main/charset_client_win_utf8mb4.test @@ -0,0 +1,3 @@ +--source include/windows.inc +--source include/check_utf8_cli.inc +--exec $MYSQL --default-character-set=auto -e "select @@character_set_client" diff --git a/mysql-test/main/grant_not_windows.result b/mysql-test/main/grant_utf8_cli.result similarity index 100% rename from mysql-test/main/grant_not_windows.result rename to mysql-test/main/grant_utf8_cli.result diff --git a/mysql-test/main/grant_not_windows.test b/mysql-test/main/grant_utf8_cli.test similarity index 82% rename from mysql-test/main/grant_not_windows.test rename to mysql-test/main/grant_utf8_cli.test index 55b09232edc..bc811d5298e 100644 --- a/mysql-test/main/grant_not_windows.test +++ b/mysql-test/main/grant_utf8_cli.test @@ -1,6 +1,5 @@ - # UTF8 parameters to mysql client do not work on Windows ---source include/not_windows.inc --source include/not_embedded.inc +--source include/check_utf8_cli.inc # # Bug#21432 Database/Table name limited to 64 bytes, not chars, problems with multi-byte diff --git a/mysql-test/suite.pm b/mysql-test/suite.pm index 4cc6b410fa1..ad67117a229 100644 --- a/mysql-test/suite.pm +++ b/mysql-test/suite.pm @@ -87,6 +87,28 @@ sub skip_combinations { $skip{'main/ssl_verify_ip.test'} = 'x509v3 support required' unless $openssl_ver ge "1.0.2"; + sub utf8_command_line_ok() { + if (IS_WINDOWS) { + # Can use UTF8 on command line since Windows 10 1903 (10.0.18362) + # or if OS codepage is set to UTF8 + my($os_name, $os_major, $os_minor, $os_build, $os_id) = Win32::GetOSVersion(); + if($os_major lt 10){ + return 0; + } elsif($os_major gt 10 or $os_minor gt 0 or $os_build ge 18362){ + return 1; + } elsif(Win32::GetACP() eq 65001) { + return 1; + } + return 0; + } + return 1; + } + + $skip{'include/check_utf8_cli.inc'} = 'No utf8 command line support' + unless utf8_command_line_ok(); + + $skip{'include/no_utf8_cli.inc'} = 'Not tested with utf8 command line support' + unless !utf8_command_line_ok(); %skip; } diff --git a/mysys/charset.c b/mysys/charset.c index 19cad76fdf4..2a8ac6e1ca5 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -1517,9 +1517,15 @@ const char* my_default_csname() const char* csname = NULL; #ifdef _WIN32 char cpbuf[64]; - int cp = GetConsoleCP(); - if (cp == 0) - cp = GetACP(); + UINT cp; + if (GetACP() == CP_UTF8) + cp= CP_UTF8; + else + { + cp= GetConsoleCP(); + if (cp == 0) + cp= GetACP(); + } snprintf(cpbuf, sizeof(cpbuf), "cp%d", (int)cp); csname = my_os_charset_to_mysql_charset(cpbuf); #elif defined(HAVE_SETLOCALE) && defined(HAVE_NL_LANGINFO) diff --git a/mysys/get_password.c b/mysys/get_password.c index bdd20d0349b..18286fd9e39 100644 --- a/mysys/get_password.c +++ b/mysys/get_password.c @@ -100,7 +100,7 @@ char *get_tty_password(const char *opt_message) /* Allocate output string, and convert UTF16 password to output codepage. */ - cp= GetConsoleCP(); + cp= GetACP() == CP_UTF8 ? CP_UTF8 : GetConsoleCP(); if (!(to_len= WideCharToMultiByte(cp, 0, wbuf, -1, NULL, 0, NULL, NULL))) DBUG_RETURN(NULL); diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c index 3fe025ba808..6e9c6334620 100644 --- a/mysys/my_getopt.c +++ b/mysys/my_getopt.c @@ -38,7 +38,7 @@ static double getopt_double(char *arg, const struct my_option *optp, int *err); static void init_variables(const struct my_option *, init_func_p); static void init_one_value(const struct my_option *, void *, longlong); static void fini_one_value(const struct my_option *, void *, longlong); -static int setval(const struct my_option *, void *, char *, my_bool); +static int setval(const struct my_option *, void *, char *, my_bool, const char *); static char *check_struct_option(char *cur_arg, char *key_name); /* @@ -133,6 +133,50 @@ double getopt_ulonglong2double(ulonglong v) return u.dbl; } +#ifdef _WIN32 +/** + + On Windows, if program is running in UTF8 mode, but some arguments are not UTF8. + + This will mostly likely be a sign of old "ANSI" my.ini, and it is likely that + something will go wrong, e.g file access error. +*/ +static void validate_value(const char *key, const char *value, + const char *filename) +{ + MY_STRCOPY_STATUS status; + const struct charset_info_st *cs= &my_charset_utf8mb4_bin; + size_t len; + if (GetACP() != CP_UTF8) + return; + if (!(len= strlen(value))) + return; + cs->cset->well_formed_char_length(cs, value, value + len, len, &status); + if (!status.m_well_formed_error_pos) + return; + if (filename && *filename) + { + my_getopt_error_reporter(WARNING_LEVEL, + "%s: invalid (non-UTF8) characters found for option '%s'" + " in file '%s'", + my_progname, key, filename); + } + else + { + /* + Should never happen, non-UTF8 can be read from option's + file only. + */ + DBUG_ASSERT(0); + my_getopt_error_reporter( + WARNING_LEVEL, "%s: invalid (non-UTF8) characters for option %s", + my_progname, key); + } +} +#else +#define validate_value(key, value, filename) (void)filename +#endif + /** Handle command line options. Sort options. @@ -564,7 +608,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, } } if ((error= setval(optp, optp->value, argument, - set_maximum_value))) + set_maximum_value,filename))) DBUG_RETURN(error); if (get_one_option(optp, argument, filename)) DBUG_RETURN(EXIT_UNSPECIFIED_ERROR); @@ -610,7 +654,7 @@ int handle_options(int *argc, char ***argv, const struct my_option *longopts, continue; } if ((!option_is_autoset) && - ((error= setval(optp, value, argument, set_maximum_value))) && + ((error= setval(optp, value, argument, set_maximum_value,filename))) && !option_is_loose) DBUG_RETURN(error); if (get_one_option(optp, argument, filename)) @@ -711,7 +755,7 @@ static my_bool get_bool_argument(const struct my_option *opts, */ static int setval(const struct my_option *opts, void *value, char *argument, - my_bool set_maximum_value) + my_bool set_maximum_value, const char *option_file) { int err= 0, res= 0; DBUG_ENTER("setval"); @@ -858,6 +902,7 @@ static int setval(const struct my_option *opts, void *value, char *argument, goto ret; }; } + validate_value(opts->name, argument, option_file); DBUG_RETURN(0); ret: diff --git a/mysys/my_init.c b/mysys/my_init.c index d201d45a4ee..2f21bcb735f 100644 --- a/mysys/my_init.c +++ b/mysys/my_init.c @@ -34,6 +34,7 @@ #endif static void my_win_init(void); static my_bool win32_init_tcp_ip(); +static void setup_codepages(); #else #define my_win_init() #endif @@ -67,6 +68,69 @@ static ulong atoi_octal(const char *str) MYSQL_FILE *mysql_stdin= NULL; static MYSQL_FILE instrumented_stdin; +#ifdef _WIN32 +static UINT orig_console_cp, orig_console_output_cp; + +static void reset_console_cp(void) +{ + /* + We try not to call SetConsoleCP unnecessarily, to workaround a bug on + older Windows 10 (1803), which could switch truetype console fonts to + raster, eventhough SetConsoleCP would be a no-op (switch from UTF8 to UTF8). + */ + if (GetConsoleCP() != orig_console_cp) + SetConsoleCP(orig_console_cp); + if (GetConsoleOutputCP() != orig_console_output_cp) + SetConsoleOutputCP(orig_console_output_cp); +} + +/* + The below fixes discrepancies in console output and + command line parameter encoding. command line is in + ANSI codepage, output to console by default is in OEM, but + we like them to be in the same encoding. + + We do this only if current codepage is UTF8, i.e when we + know we're on Windows that can handle UTF8 well. +*/ +static void setup_codepages() +{ + UINT acp; + BOOL is_a_tty= fileno(stdout) >= 0 && isatty(fileno(stdout)); + + if (is_a_tty) + { + /* + Save console codepages, in case we change them, + to restore them on exit. + */ + orig_console_cp= GetConsoleCP(); + orig_console_output_cp= GetConsoleOutputCP(); + if (orig_console_cp && orig_console_output_cp) + atexit(reset_console_cp); + } + + if ((acp= GetACP()) != CP_UTF8) + return; + + /* + Use setlocale to make mbstowcs/mkdir/getcwd behave, see + https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/setlocale-wsetlocale + */ + setlocale(LC_ALL, "en_US.UTF8"); + + if (is_a_tty && (orig_console_cp != acp || orig_console_output_cp != acp)) + { + /* + If ANSI codepage is UTF8, we actually want to switch console + to it as well. + */ + SetConsoleCP(acp); + SetConsoleOutputCP(acp); + } +} +#endif + /** Initialize my_sys functions, resources and variables @@ -337,6 +401,17 @@ static void my_win_init(void) _tzset(); + /* + We do not want text translation (LF->CRLF) + when stdout is console/terminal, it is buggy + */ + if (fileno(stdout) >= 0 && isatty(fileno(stdout))) + (void)setmode(fileno(stdout), O_BINARY); + + if (fileno(stderr) >= 0 && isatty(fileno(stderr))) + (void) setmode(fileno(stderr), O_BINARY); + + setup_codepages(); DBUG_VOID_RETURN; }