From 0892e6d028f7b08da938defd9443ecbb45cda612 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 9 Apr 2024 09:32:47 +0300 Subject: [PATCH] MDEV-33585 The maximum innodb_log_buffer_size is too large On Microsoft Windows, ReadFile() as well as WriteFile() limit the size of the request to DWORD, which is 32 bits (at most 4 GiB - 1) also on 64-bit systems. On FreeBSD, sysctl debug.iosize_max_clamp could limit the size of a write request to INT_MAX. The size of a read request is always limited to INT_MAX. This would allow the request size to be 4095 bytes more than the Linux limit (0x7ffff000 according to "man 2 read" and "man 2 write"). On OpenBSD, Solaris and possibly NetBSD, the read request size is limited to SSIZE_T_MAX, which would be half the current maximum innodb_log_buffer_size. This should be not much of an issue anyway, because on contemporary 64-bit platforms, the virtual addresses are limited to 48 bits. IBM AIX documentation mentions OFF_MAX which would apply when a 64-bit application is running on a 32-bit kernel. Let us declare innodb_log_buffer_size as 32-bit unsigned and make the maximum 0x7ffff000, to be compatible with the least common denominator (Linux). The maximum innodb_sort_buffer_size already was 64 MiB, which is not a problem. SyncFileIO::execute(): Assert that the size of a synchronous read or write request is limited to the maximum. Reviewed by: Vladislav Vaintroub --- extra/mariabackup/xtrabackup.cc | 4 +- .../sys_vars/r/sysvars_innodb,32bit.rdiff | 77 ++++++++----------- .../suite/sys_vars/r/sysvars_innodb.result | 4 +- storage/innobase/handler/ha_innodb.cc | 4 +- storage/innobase/include/log0log.h | 5 +- storage/innobase/include/os0file.h | 12 +++ storage/innobase/os/os0file.cc | 2 + 7 files changed, 57 insertions(+), 51 deletions(-) diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index c53ff0792c3..f9993965092 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -1888,8 +1888,8 @@ struct my_option xb_server_options[] = {"innodb_log_buffer_size", OPT_INNODB_LOG_BUFFER_SIZE, "Redo log buffer size in bytes.", (G_PTR*) &log_sys.buf_size, (G_PTR*) &log_sys.buf_size, 0, - IF_WIN(GET_ULL,GET_ULONG), REQUIRED_ARG, 2U << 20, - 2U << 20, SIZE_T_MAX, 0, 4096, 0}, + GET_UINT, REQUIRED_ARG, 2U << 20, + 2U << 20, log_sys.buf_size_max, 0, 4096, 0}, #if defined __linux__ || defined _WIN32 {"innodb_log_file_buffering", OPT_INNODB_LOG_FILE_BUFFERING, "Whether the file system cache for ib_logfile0 is enabled during --backup", diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff b/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff index 9ae5049be2d..81db8629d19 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff @@ -1,4 +1,6 @@ -@@ -49,7 +49,7 @@ +--- sysvars_innodb.result ++++ sysvars_innodb.result,32bit +@@ -47,7 +47,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 8 VARIABLE_SCOPE GLOBAL @@ -7,7 +9,7 @@ VARIABLE_COMMENT Number of InnoDB Adaptive Hash Index Partitions (default 8) NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 512 -@@ -73,7 +73,7 @@ +@@ -71,7 +71,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 1 VARIABLE_SCOPE GLOBAL @@ -16,7 +18,7 @@ VARIABLE_COMMENT The AUTOINC lock modes supported by InnoDB: 0 => Old style AUTOINC locking (for backward compatibility); 1 => New style AUTOINC locking; 2 => No AUTOINC locking (unsafe for SBR) NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 2 -@@ -85,10 +85,10 @@ +@@ -83,10 +83,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -29,7 +31,7 @@ NUMERIC_BLOCK_SIZE 1048576 ENUM_VALUE_LIST NULL READ_ONLY YES -@@ -121,7 +121,7 @@ +@@ -119,7 +119,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 25 VARIABLE_SCOPE GLOBAL @@ -38,7 +40,7 @@ VARIABLE_COMMENT Dump only the hottest N% of each buffer pool, defaults to 25 NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 100 -@@ -205,7 +205,7 @@ +@@ -203,7 +203,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -47,7 +49,7 @@ VARIABLE_COMMENT A number between [0, 100] that tells how oftern buffer pool dump status in percentages should be printed. E.g. 10 means that buffer pool dump status is printed when every 10% of number of buffer pool pages are dumped. Default is 0 (only start and end status is printed). NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 100 -@@ -325,7 +325,7 @@ +@@ -323,7 +323,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 5 VARIABLE_SCOPE GLOBAL @@ -56,7 +58,7 @@ VARIABLE_COMMENT If the compression failure rate of a table is greater than this number more padding is added to the pages to reduce the failures. A value of zero implies no padding NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 100 -@@ -349,7 +349,7 @@ +@@ -347,7 +347,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 50 VARIABLE_SCOPE GLOBAL @@ -65,7 +67,7 @@ VARIABLE_COMMENT Percentage of empty space on a data page that can be reserved to make the page compressible. NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 75 -@@ -625,7 +625,7 @@ +@@ -623,7 +623,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 600 VARIABLE_SCOPE GLOBAL @@ -74,7 +76,7 @@ VARIABLE_COMMENT Maximum number of seconds that semaphore times out in InnoDB. NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 4294967295 -@@ -673,7 +673,7 @@ +@@ -671,7 +671,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 30 VARIABLE_SCOPE GLOBAL @@ -83,7 +85,7 @@ VARIABLE_COMMENT Number of iterations over which the background flushing is averaged. NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 1000 -@@ -697,7 +697,7 @@ +@@ -695,7 +695,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 1 VARIABLE_SCOPE GLOBAL @@ -92,7 +94,7 @@ VARIABLE_COMMENT Controls the durability/speed trade-off for commits. Set to 0 (write and flush redo log to disk only once per second), 1 (flush to disk at each commit), 2 (write to log at commit but flush to disk only once per second) or 3 (flush to disk at prepare and at commit, slower and usually redundant). 1 and 3 guarantees that after a crash, committed transactions will not be lost and will be consistent with the binlog and other transactional engines. 2 can get inconsistent and lose transactions if there is a power failure or kernel crash but not if mysqld crashes. 0 has no guarantees in case of crash. 0 and 2 can be faster than 1 or 3. NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 3 -@@ -721,7 +721,7 @@ +@@ -719,7 +719,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 1 VARIABLE_SCOPE GLOBAL @@ -101,7 +103,7 @@ VARIABLE_COMMENT Set to 0 (don't flush neighbors from buffer pool), 1 (flush contiguous neighbors from buffer pool) or 2 (flush neighbors from buffer pool), when flushing a block NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 2 -@@ -757,7 +757,7 @@ +@@ -755,7 +755,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -110,7 +112,7 @@ VARIABLE_COMMENT Helps to save your data in case the disk image of the database becomes corrupt. Value 5 can return bogus data, and 6 can permanently corrupt data. NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 6 -@@ -781,10 +781,10 @@ +@@ -779,10 +779,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 8000000 VARIABLE_SCOPE GLOBAL @@ -123,7 +125,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -817,7 +817,7 @@ +@@ -815,7 +815,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 84 VARIABLE_SCOPE GLOBAL @@ -132,7 +134,7 @@ VARIABLE_COMMENT InnoDB Fulltext search maximum token size in characters NUMERIC_MIN_VALUE 10 NUMERIC_MAX_VALUE 84 -@@ -829,7 +829,7 @@ +@@ -827,7 +827,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 3 VARIABLE_SCOPE GLOBAL @@ -141,7 +143,7 @@ VARIABLE_COMMENT InnoDB Fulltext search minimum token size in characters NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 16 -@@ -841,7 +841,7 @@ +@@ -839,7 +839,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 2000 VARIABLE_SCOPE GLOBAL @@ -150,7 +152,7 @@ VARIABLE_COMMENT InnoDB Fulltext search number of words to optimize for each optimize table call NUMERIC_MIN_VALUE 1000 NUMERIC_MAX_VALUE 10000 -@@ -853,10 +853,10 @@ +@@ -851,10 +851,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 2000000000 VARIABLE_SCOPE GLOBAL @@ -163,7 +165,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -877,7 +877,7 @@ +@@ -875,7 +875,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 2 VARIABLE_SCOPE GLOBAL @@ -172,7 +174,7 @@ VARIABLE_COMMENT InnoDB Fulltext search parallel sort degree, will round up to nearest power of 2 number NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 16 -@@ -889,10 +889,10 @@ +@@ -887,10 +887,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 640000000 VARIABLE_SCOPE GLOBAL @@ -185,7 +187,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -937,22 +937,22 @@ +@@ -935,22 +935,22 @@ SESSION_VALUE NULL DEFAULT_VALUE 200 VARIABLE_SCOPE GLOBAL @@ -213,20 +215,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -985,10 +985,10 @@ - SESSION_VALUE NULL - DEFAULT_VALUE 16777216 - VARIABLE_SCOPE GLOBAL --VARIABLE_TYPE BIGINT UNSIGNED -+VARIABLE_TYPE INT UNSIGNED - VARIABLE_COMMENT Redo log buffer size in bytes. - NUMERIC_MIN_VALUE 2097152 --NUMERIC_MAX_VALUE 18446744073709551615 -+NUMERIC_MAX_VALUE 4294967295 - NUMERIC_BLOCK_SIZE 4096 - ENUM_VALUE_LIST NULL - READ_ONLY YES -@@ -1033,10 +1033,10 @@ +@@ -1043,10 +1043,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 32 VARIABLE_SCOPE GLOBAL @@ -239,7 +228,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -1045,10 +1045,10 @@ +@@ -1055,10 +1055,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 1536 VARIABLE_SCOPE GLOBAL @@ -252,7 +241,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -1081,10 +1081,10 @@ +@@ -1091,10 +1091,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -265,7 +254,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -1093,7 +1093,7 @@ +@@ -1103,7 +1103,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -274,7 +263,7 @@ VARIABLE_COMMENT Maximum delay of user threads in micro-seconds NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 10000000 -@@ -1225,10 +1225,10 @@ +@@ -1235,10 +1235,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -287,7 +276,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY YES -@@ -1249,7 +1249,7 @@ +@@ -1259,7 +1259,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 16384 VARIABLE_SCOPE GLOBAL @@ -296,7 +285,7 @@ VARIABLE_COMMENT Page size to use for all InnoDB tablespaces. NUMERIC_MIN_VALUE 4096 NUMERIC_MAX_VALUE 65536 -@@ -1285,7 +1285,7 @@ +@@ -1295,7 +1295,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 1000 VARIABLE_SCOPE GLOBAL @@ -305,7 +294,7 @@ VARIABLE_COMMENT Number of UNDO log pages to purge in one batch from the history list. NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 5000 -@@ -1297,7 +1297,7 @@ +@@ -1307,7 +1307,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 128 VARIABLE_SCOPE GLOBAL @@ -314,7 +303,7 @@ VARIABLE_COMMENT Deprecated parameter with no effect NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 128 -@@ -1333,7 +1333,7 @@ +@@ -1343,7 +1343,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 56 VARIABLE_SCOPE GLOBAL @@ -323,7 +312,7 @@ VARIABLE_COMMENT Number of pages that must be accessed sequentially for InnoDB to trigger a readahead. NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 64 -@@ -1405,7 +1405,7 @@ +@@ -1427,7 +1427,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 1048576 VARIABLE_SCOPE GLOBAL @@ -332,7 +321,7 @@ VARIABLE_COMMENT Memory buffer size for index creation NUMERIC_MIN_VALUE 65536 NUMERIC_MAX_VALUE 67108864 -@@ -1573,10 +1573,10 @@ +@@ -1595,10 +1595,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 30 VARIABLE_SCOPE GLOBAL diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result index 623a32b640a..6a7e184f68e 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result @@ -983,10 +983,10 @@ VARIABLE_NAME INNODB_LOG_BUFFER_SIZE SESSION_VALUE NULL DEFAULT_VALUE 16777216 VARIABLE_SCOPE GLOBAL -VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_TYPE INT UNSIGNED VARIABLE_COMMENT Redo log buffer size in bytes. NUMERIC_MIN_VALUE 2097152 -NUMERIC_MAX_VALUE 18446744073709551615 +NUMERIC_MAX_VALUE 2147479552 NUMERIC_BLOCK_SIZE 4096 ENUM_VALUE_LIST NULL READ_ONLY YES diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 13f46d8582a..9dbee523536 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -19316,10 +19316,10 @@ static MYSQL_SYSVAR_ULONG(page_size, srv_page_size, NULL, NULL, UNIV_PAGE_SIZE_DEF, UNIV_PAGE_SIZE_MIN, UNIV_PAGE_SIZE_MAX, 0); -static MYSQL_SYSVAR_SIZE_T(log_buffer_size, log_sys.buf_size, +static MYSQL_SYSVAR_UINT(log_buffer_size, log_sys.buf_size, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, "Redo log buffer size in bytes.", - NULL, NULL, 16U << 20, 2U << 20, SIZE_T_MAX, 4096); + NULL, NULL, 16U << 20, 2U << 20, log_sys.buf_size_max, 4096); #if defined __linux__ || defined _WIN32 static MYSQL_SYSVAR_BOOL(log_file_buffering, log_sys.log_buffered, diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index 48ac99fb817..f553b9bddd4 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -132,6 +132,9 @@ public: /** Redo log buffer */ struct log_t { + /** The maximum buf_size */ + static constexpr unsigned buf_size_max= os_file_request_size_max; + /** The original (not version-tagged) InnoDB redo log format */ static constexpr uint32_t FORMAT_3_23= 0; /** The MySQL 5.7.9/MariaDB 10.2.2 log format */ @@ -187,7 +190,7 @@ public: /** number of append_prepare_wait(); protected by lock_lsn() or lsn_lock */ size_t waits; /** innodb_log_buffer_size (size of buf,flush_buf if !is_pmem(), in bytes) */ - size_t buf_size; + unsigned buf_size; /** log file size in bytes, including the header */ lsn_t file_size; diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h index a5953dcfd51..7eba359fbd3 100644 --- a/storage/innobase/include/os0file.h +++ b/storage/innobase/include/os0file.h @@ -46,6 +46,18 @@ Created 10/21/1995 Heikki Tuuri #include #endif /* !_WIN32 */ +/** The maximum size of a read or write request. + +According to Linux "man 2 read" and "man 2 write" this applies to +both 32-bit and 64-bit systems. + +On FreeBSD, the limit is close to the Linux one, INT_MAX. + +On Microsoft Windows, the limit is UINT_MAX (4 GiB - 1). + +On other systems, the limit typically is up to SSIZE_T_MAX. */ +static constexpr unsigned os_file_request_size_max= 0x7ffff000; + extern bool os_has_said_disk_full; /** File offset in bytes */ diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index 04835377f8a..672902a0d68 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -678,6 +678,8 @@ ssize_t SyncFileIO::execute(const IORequest& request) { ssize_t n_bytes; + ut_ad(m_n > 0); + ut_ad(m_n <= os_file_request_size_max); if (request.is_read()) { #ifdef _WIN32